1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1619 m_AMDFAM10 | m_GENERIC,
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1669 static enum stringop_alg stringop_alg = no_stringop;
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1711 /* The "default" register map used in 32bit mode. */
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1724 /* The "default" register map used in 64bit mode. */
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1802 /* Define parameter passing and return registers. */
1804 static int const x86_64_int_parameter_registers[6] =
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1811 CX_REG, DX_REG, R8_REG, R9_REG
1814 static int const x86_64_int_return_registers[4] =
1816 AX_REG, DX_REG, DI_REG, SI_REG
1819 /* Define the structure for the machine field in struct function. */
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1825 struct stack_local_entry *next;
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1835 saved static chain if ix86_static_chain_on_stack
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1844 <- sse_regs_save_offset
1847 [va_arg registers] |
1851 [padding2] | = to_allocate
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1900 /* -mstackrealign option */
1901 static const char ix86_force_align_arg_pointer_string[]
1902 = "force_align_arg_pointer";
1904 static rtx (*ix86_gen_leave) (void);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1918 /* Alignment for incoming stack boundary in bits specified at
1920 static unsigned int ix86_user_incoming_stack_boundary;
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1941 int ix86_section_threshold = 65536;
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1947 /* Fence to use after loop using movnt. */
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1972 #define MAX_CLASSES 4
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1990 static void predict_jump (int);
1991 static unsigned int split_stack_prologue_scratch_regno (void);
1992 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
1994 enum ix86_function_specific_strings
1996 IX86_FUNCTION_SPECIFIC_ARCH,
1997 IX86_FUNCTION_SPECIFIC_TUNE,
1998 IX86_FUNCTION_SPECIFIC_FPMATH,
1999 IX86_FUNCTION_SPECIFIC_MAX
2002 static char *ix86_target_string (int, int, const char *, const char *,
2003 const char *, bool);
2004 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2005 static void ix86_function_specific_save (struct cl_target_option *);
2006 static void ix86_function_specific_restore (struct cl_target_option *);
2007 static void ix86_function_specific_print (FILE *, int,
2008 struct cl_target_option *);
2009 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2010 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2011 static bool ix86_can_inline_p (tree, tree);
2012 static void ix86_set_current_function (tree);
2013 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2015 static enum calling_abi ix86_function_abi (const_tree);
2018 #ifndef SUBTARGET32_DEFAULT_CPU
2019 #define SUBTARGET32_DEFAULT_CPU "i386"
2022 /* The svr4 ABI for the i386 says that records and unions are returned
2024 #ifndef DEFAULT_PCC_STRUCT_RETURN
2025 #define DEFAULT_PCC_STRUCT_RETURN 1
2028 /* Whether -mtune= or -march= were specified */
2029 static int ix86_tune_defaulted;
2030 static int ix86_arch_specified;
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2152 /* Processor target table, indexed by processor number */
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2210 /* Return true if a red-zone is in use. */
2213 ix86_using_red_zone (void)
2215 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2218 /* Implement TARGET_HANDLE_OPTION. */
2221 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2228 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2241 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2257 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2262 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2270 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2275 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2288 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2296 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2301 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2309 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2314 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2322 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2327 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2335 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2340 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2348 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2353 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2354 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2364 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2365 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2372 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2376 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2384 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2385 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2389 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2397 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2398 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2402 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2410 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2411 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2415 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2423 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2424 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2428 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2436 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2437 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2441 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2449 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2450 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2454 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2462 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2463 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2467 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2475 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2476 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2480 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2488 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2489 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2493 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2501 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2502 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2506 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2514 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2515 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2519 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2527 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2528 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2532 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2540 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2545 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2553 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2558 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2559 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2568 /* Return a string that documents the current -m options. The caller is
2569 responsible for freeing the string. */
2572 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2573 const char *fpmath, bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 int mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-m64", OPTION_MASK_ISA_64BIT },
2586 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2587 { "-mfma", OPTION_MASK_ISA_FMA },
2588 { "-mxop", OPTION_MASK_ISA_XOP },
2589 { "-mlwp", OPTION_MASK_ISA_LWP },
2590 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2591 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2592 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2593 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2594 { "-msse3", OPTION_MASK_ISA_SSE3 },
2595 { "-msse2", OPTION_MASK_ISA_SSE2 },
2596 { "-msse", OPTION_MASK_ISA_SSE },
2597 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2598 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2599 { "-mmmx", OPTION_MASK_ISA_MMX },
2600 { "-mabm", OPTION_MASK_ISA_ABM },
2601 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2602 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2603 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2604 { "-maes", OPTION_MASK_ISA_AES },
2605 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2606 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2607 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2608 { "-mf16c", OPTION_MASK_ISA_F16C },
2612 static struct ix86_target_opts flag_opts[] =
2614 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2615 { "-m80387", MASK_80387 },
2616 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2617 { "-malign-double", MASK_ALIGN_DOUBLE },
2618 { "-mcld", MASK_CLD },
2619 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2620 { "-mieee-fp", MASK_IEEE_FP },
2621 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2622 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2623 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2624 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2625 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2626 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2627 { "-mno-red-zone", MASK_NO_RED_ZONE },
2628 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2629 { "-mrecip", MASK_RECIP },
2630 { "-mrtd", MASK_RTD },
2631 { "-msseregparm", MASK_SSEREGPARM },
2632 { "-mstack-arg-probe", MASK_STACK_PROBE },
2633 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2634 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2635 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2638 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2641 char target_other[40];
2650 memset (opts, '\0', sizeof (opts));
2652 /* Add -march= option. */
2655 opts[num][0] = "-march=";
2656 opts[num++][1] = arch;
2659 /* Add -mtune= option. */
2662 opts[num][0] = "-mtune=";
2663 opts[num++][1] = tune;
2666 /* Pick out the options in isa options. */
2667 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2669 if ((isa & isa_opts[i].mask) != 0)
2671 opts[num++][0] = isa_opts[i].option;
2672 isa &= ~ isa_opts[i].mask;
2676 if (isa && add_nl_p)
2678 opts[num++][0] = isa_other;
2679 sprintf (isa_other, "(other isa: %#x)", isa);
2682 /* Add flag options. */
2683 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2685 if ((flags & flag_opts[i].mask) != 0)
2687 opts[num++][0] = flag_opts[i].option;
2688 flags &= ~ flag_opts[i].mask;
2692 if (flags && add_nl_p)
2694 opts[num++][0] = target_other;
2695 sprintf (target_other, "(other flags: %#x)", flags);
2698 /* Add -fpmath= option. */
2701 opts[num][0] = "-mfpmath=";
2702 opts[num++][1] = fpmath;
2709 gcc_assert (num < ARRAY_SIZE (opts));
2711 /* Size the string. */
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2717 for (j = 0; j < 2; j++)
2719 len += strlen (opts[i][j]);
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2726 for (i = 0; i < num; i++)
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2746 for (j = 0; j < 2; j++)
2749 memcpy (ptr, opts[i][j], len2[j]);
2751 line_len += len2[j];
2756 gcc_assert (ret + len >= ptr);
2761 /* Return TRUE if software prefetching is beneficial for the
2765 software_prefetching_beneficial_p (void)
2769 case PROCESSOR_GEODE:
2771 case PROCESSOR_ATHLON:
2773 case PROCESSOR_AMDFAM10:
2781 /* Return true, if profiling code should be emitted before
2782 prologue. Otherwise it returns false.
2783 Note: For x86 with "hotfix" it is sorried. */
2785 ix86_profile_before_prologue (void)
2787 return flag_fentry != 0;
2790 /* Function that is callable from the debugger to print the current
2793 ix86_debug_options (void)
2795 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2796 ix86_arch_string, ix86_tune_string,
2797 ix86_fpmath_string, true);
2801 fprintf (stderr, "%s\n\n", opts);
2805 fputs ("<no options>\n\n", stderr);
2810 /* Override various settings based on options. If MAIN_ARGS_P, the
2811 options are from the command line, otherwise they are from
2815 ix86_option_override_internal (bool main_args_p)
2818 unsigned int ix86_arch_mask, ix86_tune_mask;
2819 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2824 /* Comes from final.c -- no real reason to change it. */
2825 #define MAX_CODE_ALIGN 16
2833 PTA_PREFETCH_SSE = 1 << 4,
2835 PTA_3DNOW_A = 1 << 6,
2839 PTA_POPCNT = 1 << 10,
2841 PTA_SSE4A = 1 << 12,
2842 PTA_NO_SAHF = 1 << 13,
2843 PTA_SSE4_1 = 1 << 14,
2844 PTA_SSE4_2 = 1 << 15,
2846 PTA_PCLMUL = 1 << 17,
2849 PTA_MOVBE = 1 << 20,
2853 PTA_FSGSBASE = 1 << 24,
2854 PTA_RDRND = 1 << 25,
2860 const char *const name; /* processor name or nickname. */
2861 const enum processor_type processor;
2862 const enum attr_cpu schedule;
2863 const unsigned /*enum pta_flags*/ flags;
2865 const processor_alias_table[] =
2867 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2868 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2869 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2870 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2871 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2872 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2873 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2874 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2875 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2876 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2877 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2878 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2879 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2883 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2884 PTA_MMX | PTA_SSE | PTA_SSE2},
2885 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX |PTA_SSE | PTA_SSE2},
2887 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2},
2889 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2891 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_CX16 | PTA_NO_SAHF},
2894 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16},
2897 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2898 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2899 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2900 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2902 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2903 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2904 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2905 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2909 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2914 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2915 {"x86-64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_NO_SAHF},
2920 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2923 {"opteron", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_NO_SAHF},
2926 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2929 {"athlon64", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_NO_SAHF},
2932 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2935 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_NO_SAHF},
2938 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2944 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2946 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2947 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2948 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2949 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2950 0 /* flags are only used for -march switch. */ },
2951 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2952 PTA_64BIT /* flags are only used for -march switch. */ },
2955 int const pta_size = ARRAY_SIZE (processor_alias_table);
2957 /* Set up prefix/suffix so the error messages refer to either the command
2958 line argument, or the attribute(target). */
2967 prefix = "option(\"";
2972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2973 SUBTARGET_OVERRIDE_OPTIONS;
2976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2977 SUBSUBTARGET_OVERRIDE_OPTIONS;
2980 /* -fPIC is the default for x86_64. */
2981 if (TARGET_MACHO && TARGET_64BIT)
2984 /* Need to check -mtune=generic first. */
2985 if (ix86_tune_string)
2987 if (!strcmp (ix86_tune_string, "generic")
2988 || !strcmp (ix86_tune_string, "i686")
2989 /* As special support for cross compilers we read -mtune=native
2990 as -mtune=generic. With native compilers we won't see the
2991 -mtune=native, as it was changed by the driver. */
2992 || !strcmp (ix86_tune_string, "native"))
2995 ix86_tune_string = "generic64";
2997 ix86_tune_string = "generic32";
2999 /* If this call is for setting the option attribute, allow the
3000 generic32/generic64 that was previously set. */
3001 else if (!main_args_p
3002 && (!strcmp (ix86_tune_string, "generic32")
3003 || !strcmp (ix86_tune_string, "generic64")))
3005 else if (!strncmp (ix86_tune_string, "generic", 7))
3006 error ("bad value (%s) for %stune=%s %s",
3007 ix86_tune_string, prefix, suffix, sw);
3008 else if (!strcmp (ix86_tune_string, "x86-64"))
3009 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3010 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3011 prefix, suffix, prefix, suffix, prefix, suffix);
3015 if (ix86_arch_string)
3016 ix86_tune_string = ix86_arch_string;
3017 if (!ix86_tune_string)
3019 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3020 ix86_tune_defaulted = 1;
3023 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3024 need to use a sensible tune option. */
3025 if (!strcmp (ix86_tune_string, "generic")
3026 || !strcmp (ix86_tune_string, "x86-64")
3027 || !strcmp (ix86_tune_string, "i686"))
3030 ix86_tune_string = "generic64";
3032 ix86_tune_string = "generic32";
3036 if (ix86_stringop_string)
3038 if (!strcmp (ix86_stringop_string, "rep_byte"))
3039 stringop_alg = rep_prefix_1_byte;
3040 else if (!strcmp (ix86_stringop_string, "libcall"))
3041 stringop_alg = libcall;
3042 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3043 stringop_alg = rep_prefix_4_byte;
3044 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3046 /* rep; movq isn't available in 32-bit code. */
3047 stringop_alg = rep_prefix_8_byte;
3048 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3049 stringop_alg = loop_1_byte;
3050 else if (!strcmp (ix86_stringop_string, "loop"))
3051 stringop_alg = loop;
3052 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3053 stringop_alg = unrolled_loop;
3055 error ("bad value (%s) for %sstringop-strategy=%s %s",
3056 ix86_stringop_string, prefix, suffix, sw);
3059 if (!ix86_arch_string)
3060 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3062 ix86_arch_specified = 1;
3064 /* Validate -mabi= value. */
3065 if (ix86_abi_string)
3067 if (strcmp (ix86_abi_string, "sysv") == 0)
3068 ix86_abi = SYSV_ABI;
3069 else if (strcmp (ix86_abi_string, "ms") == 0)
3072 error ("unknown ABI (%s) for %sabi=%s %s",
3073 ix86_abi_string, prefix, suffix, sw);
3076 ix86_abi = DEFAULT_ABI;
3078 if (ix86_cmodel_string != 0)
3080 if (!strcmp (ix86_cmodel_string, "small"))
3081 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3082 else if (!strcmp (ix86_cmodel_string, "medium"))
3083 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3084 else if (!strcmp (ix86_cmodel_string, "large"))
3085 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3087 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3088 else if (!strcmp (ix86_cmodel_string, "32"))
3089 ix86_cmodel = CM_32;
3090 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3091 ix86_cmodel = CM_KERNEL;
3093 error ("bad value (%s) for %scmodel=%s %s",
3094 ix86_cmodel_string, prefix, suffix, sw);
3098 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3099 use of rip-relative addressing. This eliminates fixups that
3100 would otherwise be needed if this object is to be placed in a
3101 DLL, and is essentially just as efficient as direct addressing. */
3102 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3103 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3104 else if (TARGET_64BIT)
3105 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3107 ix86_cmodel = CM_32;
3109 if (ix86_asm_string != 0)
3112 && !strcmp (ix86_asm_string, "intel"))
3113 ix86_asm_dialect = ASM_INTEL;
3114 else if (!strcmp (ix86_asm_string, "att"))
3115 ix86_asm_dialect = ASM_ATT;
3117 error ("bad value (%s) for %sasm=%s %s",
3118 ix86_asm_string, prefix, suffix, sw);
3120 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3121 error ("code model %qs not supported in the %s bit mode",
3122 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3123 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3124 sorry ("%i-bit mode not compiled in",
3125 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3127 for (i = 0; i < pta_size; i++)
3128 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3130 ix86_schedule = processor_alias_table[i].schedule;
3131 ix86_arch = processor_alias_table[i].processor;
3132 /* Default cpu tuning to the architecture. */
3133 ix86_tune = ix86_arch;
3135 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3136 error ("CPU you selected does not support x86-64 "
3139 if (processor_alias_table[i].flags & PTA_MMX
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3141 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3142 if (processor_alias_table[i].flags & PTA_3DNOW
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3145 if (processor_alias_table[i].flags & PTA_3DNOW_A
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3147 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3148 if (processor_alias_table[i].flags & PTA_SSE
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3151 if (processor_alias_table[i].flags & PTA_SSE2
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3154 if (processor_alias_table[i].flags & PTA_SSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3157 if (processor_alias_table[i].flags & PTA_SSSE3
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3160 if (processor_alias_table[i].flags & PTA_SSE4_1
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3163 if (processor_alias_table[i].flags & PTA_SSE4_2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3166 if (processor_alias_table[i].flags & PTA_AVX
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3168 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3169 if (processor_alias_table[i].flags & PTA_FMA
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3171 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3172 if (processor_alias_table[i].flags & PTA_SSE4A
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3175 if (processor_alias_table[i].flags & PTA_FMA4
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3177 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3178 if (processor_alias_table[i].flags & PTA_XOP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3181 if (processor_alias_table[i].flags & PTA_LWP
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3183 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3184 if (processor_alias_table[i].flags & PTA_ABM
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3186 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3187 if (processor_alias_table[i].flags & PTA_CX16
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3189 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3190 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3192 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3193 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3196 if (processor_alias_table[i].flags & PTA_MOVBE
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3198 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3199 if (processor_alias_table[i].flags & PTA_AES
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3201 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3202 if (processor_alias_table[i].flags & PTA_PCLMUL
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3204 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3205 if (processor_alias_table[i].flags & PTA_FSGSBASE
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3207 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3208 if (processor_alias_table[i].flags & PTA_RDRND
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3210 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3211 if (processor_alias_table[i].flags & PTA_F16C
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3213 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3214 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3215 x86_prefetch_sse = true;
3220 if (!strcmp (ix86_arch_string, "generic"))
3221 error ("generic CPU can be used only for %stune=%s %s",
3222 prefix, suffix, sw);
3223 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3224 error ("bad value (%s) for %sarch=%s %s",
3225 ix86_arch_string, prefix, suffix, sw);
3227 ix86_arch_mask = 1u << ix86_arch;
3228 for (i = 0; i < X86_ARCH_LAST; ++i)
3229 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_tune = processor_alias_table[i].processor;
3236 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3238 if (ix86_tune_defaulted)
3240 ix86_tune_string = "x86-64";
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_tune_string,
3243 processor_alias_table[i].name))
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3249 error ("CPU you selected does not support x86-64 "
3252 /* Intel CPUs have always interpreted SSE prefetch instructions as
3253 NOPs; so, we can enable SSE prefetch instructions even when
3254 -mtune (rather than -march) points us to a processor that has them.
3255 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3256 higher processors. */
3258 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3259 x86_prefetch_sse = true;
3263 if (ix86_tune_specified && i == pta_size)
3264 error ("bad value (%s) for %stune=%s %s",
3265 ix86_tune_string, prefix, suffix, sw);
3267 ix86_tune_mask = 1u << ix86_tune;
3268 for (i = 0; i < X86_TUNE_LAST; ++i)
3269 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3271 #ifndef USE_IX86_FRAME_POINTER
3272 #define USE_IX86_FRAME_POINTER 0
3275 #ifndef USE_X86_64_FRAME_POINTER
3276 #define USE_X86_64_FRAME_POINTER 0
3279 /* Set the default values for switches whose default depends on TARGET_64BIT
3280 in case they weren't overwritten by command line options. */
3283 if (optimize > 1 && !global_options_set.x_flag_zee)
3285 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3286 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3287 if (flag_asynchronous_unwind_tables == 2)
3288 flag_asynchronous_unwind_tables = 1;
3289 if (flag_pcc_struct_return == 2)
3290 flag_pcc_struct_return = 0;
3294 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3295 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3296 if (flag_asynchronous_unwind_tables == 2)
3297 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3298 if (flag_pcc_struct_return == 2)
3299 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3303 ix86_cost = &ix86_size_cost;
3305 ix86_cost = processor_target_table[ix86_tune].cost;
3307 /* Arrange to set up i386_stack_locals for all functions. */
3308 init_machine_status = ix86_init_machine_status;
3310 /* Validate -mregparm= value. */
3311 if (ix86_regparm_string)
3314 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3315 i = atoi (ix86_regparm_string);
3316 if (i < 0 || i > REGPARM_MAX)
3317 error ("%sregparm=%d%s is not between 0 and %d",
3318 prefix, i, suffix, REGPARM_MAX);
3323 ix86_regparm = REGPARM_MAX;
3325 /* If the user has provided any of the -malign-* options,
3326 warn and use that value only if -falign-* is not set.
3327 Remove this code in GCC 3.2 or later. */
3328 if (ix86_align_loops_string)
3330 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3331 prefix, suffix, suffix);
3332 if (align_loops == 0)
3334 i = atoi (ix86_align_loops_string);
3335 if (i < 0 || i > MAX_CODE_ALIGN)
3336 error ("%salign-loops=%d%s is not between 0 and %d",
3337 prefix, i, suffix, MAX_CODE_ALIGN);
3339 align_loops = 1 << i;
3343 if (ix86_align_jumps_string)
3345 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3346 prefix, suffix, suffix);
3347 if (align_jumps == 0)
3349 i = atoi (ix86_align_jumps_string);
3350 if (i < 0 || i > MAX_CODE_ALIGN)
3351 error ("%salign-loops=%d%s is not between 0 and %d",
3352 prefix, i, suffix, MAX_CODE_ALIGN);
3354 align_jumps = 1 << i;
3358 if (ix86_align_funcs_string)
3360 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3361 prefix, suffix, suffix);
3362 if (align_functions == 0)
3364 i = atoi (ix86_align_funcs_string);
3365 if (i < 0 || i > MAX_CODE_ALIGN)
3366 error ("%salign-loops=%d%s is not between 0 and %d",
3367 prefix, i, suffix, MAX_CODE_ALIGN);
3369 align_functions = 1 << i;
3373 /* Default align_* from the processor table. */
3374 if (align_loops == 0)
3376 align_loops = processor_target_table[ix86_tune].align_loop;
3377 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3379 if (align_jumps == 0)
3381 align_jumps = processor_target_table[ix86_tune].align_jump;
3382 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3384 if (align_functions == 0)
3386 align_functions = processor_target_table[ix86_tune].align_func;
3389 /* Validate -mbranch-cost= value, or provide default. */
3390 ix86_branch_cost = ix86_cost->branch_cost;
3391 if (ix86_branch_cost_string)
3393 i = atoi (ix86_branch_cost_string);
3395 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3397 ix86_branch_cost = i;
3399 if (ix86_section_threshold_string)
3401 i = atoi (ix86_section_threshold_string);
3403 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3405 ix86_section_threshold = i;
3408 if (ix86_tls_dialect_string)
3410 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3411 ix86_tls_dialect = TLS_DIALECT_GNU;
3412 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3413 ix86_tls_dialect = TLS_DIALECT_GNU2;
3415 error ("bad value (%s) for %stls-dialect=%s %s",
3416 ix86_tls_dialect_string, prefix, suffix, sw);
3419 if (ix87_precision_string)
3421 i = atoi (ix87_precision_string);
3422 if (i != 32 && i != 64 && i != 80)
3423 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3428 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3430 /* Enable by default the SSE and MMX builtins. Do allow the user to
3431 explicitly disable any of these. In particular, disabling SSE and
3432 MMX for kernel code is extremely useful. */
3433 if (!ix86_arch_specified)
3435 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3436 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3439 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3443 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3445 if (!ix86_arch_specified)
3447 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3449 /* i386 ABI does not specify red zone. It still makes sense to use it
3450 when programmer takes care to stack from being destroyed. */
3451 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3452 target_flags |= MASK_NO_RED_ZONE;
3455 /* Keep nonleaf frame pointers. */
3456 if (flag_omit_frame_pointer)
3457 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3458 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3459 flag_omit_frame_pointer = 1;
3461 /* If we're doing fast math, we don't care about comparison order
3462 wrt NaNs. This lets us use a shorter comparison sequence. */
3463 if (flag_finite_math_only)
3464 target_flags &= ~MASK_IEEE_FP;
3466 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3467 since the insns won't need emulation. */
3468 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3469 target_flags &= ~MASK_NO_FANCY_MATH_387;
3471 /* Likewise, if the target doesn't have a 387, or we've specified
3472 software floating point, don't use 387 inline intrinsics. */
3474 target_flags |= MASK_NO_FANCY_MATH_387;
3476 /* Turn on MMX builtins for -msse. */
3479 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3480 x86_prefetch_sse = true;
3483 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3484 if (TARGET_SSE4_2 || TARGET_ABM)
3485 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3487 /* Validate -mpreferred-stack-boundary= value or default it to
3488 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3489 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3490 if (ix86_preferred_stack_boundary_string)
3492 i = atoi (ix86_preferred_stack_boundary_string);
3493 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3494 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3495 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3497 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3500 /* Set the default value for -mstackrealign. */
3501 if (ix86_force_align_arg_pointer == -1)
3502 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3504 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3506 /* Validate -mincoming-stack-boundary= value or default it to
3507 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3508 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3509 if (ix86_incoming_stack_boundary_string)
3511 i = atoi (ix86_incoming_stack_boundary_string);
3512 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3513 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3514 i, TARGET_64BIT ? 4 : 2);
3517 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3518 ix86_incoming_stack_boundary
3519 = ix86_user_incoming_stack_boundary;
3523 /* Accept -msseregparm only if at least SSE support is enabled. */
3524 if (TARGET_SSEREGPARM
3526 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3528 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3529 if (ix86_fpmath_string != 0)
3531 if (! strcmp (ix86_fpmath_string, "387"))
3532 ix86_fpmath = FPMATH_387;
3533 else if (! strcmp (ix86_fpmath_string, "sse"))
3537 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3538 ix86_fpmath = FPMATH_387;
3541 ix86_fpmath = FPMATH_SSE;
3543 else if (! strcmp (ix86_fpmath_string, "387,sse")
3544 || ! strcmp (ix86_fpmath_string, "387+sse")
3545 || ! strcmp (ix86_fpmath_string, "sse,387")
3546 || ! strcmp (ix86_fpmath_string, "sse+387")
3547 || ! strcmp (ix86_fpmath_string, "both"))
3551 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3552 ix86_fpmath = FPMATH_387;
3554 else if (!TARGET_80387)
3556 warning (0, "387 instruction set disabled, using SSE arithmetics");
3557 ix86_fpmath = FPMATH_SSE;
3560 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3563 error ("bad value (%s) for %sfpmath=%s %s",
3564 ix86_fpmath_string, prefix, suffix, sw);
3567 /* If the i387 is disabled, then do not return values in it. */
3569 target_flags &= ~MASK_FLOAT_RETURNS;
3571 /* Use external vectorized library in vectorizing intrinsics. */
3572 if (ix86_veclibabi_string)
3574 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3575 ix86_veclib_handler = ix86_veclibabi_svml;
3576 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3577 ix86_veclib_handler = ix86_veclibabi_acml;
3579 error ("unknown vectorization library ABI type (%s) for "
3580 "%sveclibabi=%s %s", ix86_veclibabi_string,
3581 prefix, suffix, sw);
3584 if ((!USE_IX86_FRAME_POINTER
3585 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3586 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3588 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3590 /* ??? Unwind info is not correct around the CFG unless either a frame
3591 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3592 unwind info generation to be aware of the CFG and propagating states
3594 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3595 || flag_exceptions || flag_non_call_exceptions)
3596 && flag_omit_frame_pointer
3597 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3599 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3600 warning (0, "unwind tables currently require either a frame pointer "
3601 "or %saccumulate-outgoing-args%s for correctness",
3603 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3606 /* If stack probes are required, the space used for large function
3607 arguments on the stack must also be probed, so enable
3608 -maccumulate-outgoing-args so this happens in the prologue. */
3609 if (TARGET_STACK_PROBE
3610 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3612 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3613 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3614 "for correctness", prefix, suffix);
3615 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3618 /* For sane SSE instruction set generation we need fcomi instruction.
3619 It is safe to enable all CMOVE instructions. */
3623 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3626 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3627 p = strchr (internal_label_prefix, 'X');
3628 internal_label_prefix_len = p - internal_label_prefix;
3632 /* When scheduling description is not available, disable scheduler pass
3633 so it won't slow down the compilation and make x87 code slower. */
3634 if (!TARGET_SCHEDULE)
3635 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3637 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3638 ix86_cost->simultaneous_prefetches,
3639 global_options.x_param_values,
3640 global_options_set.x_param_values);
3641 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3642 global_options.x_param_values,
3643 global_options_set.x_param_values);
3644 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3645 global_options.x_param_values,
3646 global_options_set.x_param_values);
3647 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3648 global_options.x_param_values,
3649 global_options_set.x_param_values);
3651 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3652 if (flag_prefetch_loop_arrays < 0
3655 && software_prefetching_beneficial_p ())
3656 flag_prefetch_loop_arrays = 1;
3658 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3659 can be optimized to ap = __builtin_next_arg (0). */
3660 if (!TARGET_64BIT && !flag_split_stack)
3661 targetm.expand_builtin_va_start = NULL;
3665 ix86_gen_leave = gen_leave_rex64;
3666 ix86_gen_add3 = gen_adddi3;
3667 ix86_gen_sub3 = gen_subdi3;
3668 ix86_gen_sub3_carry = gen_subdi3_carry;
3669 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3670 ix86_gen_monitor = gen_sse3_monitor64;
3671 ix86_gen_andsp = gen_anddi3;
3672 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3673 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3674 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3678 ix86_gen_leave = gen_leave;
3679 ix86_gen_add3 = gen_addsi3;
3680 ix86_gen_sub3 = gen_subsi3;
3681 ix86_gen_sub3_carry = gen_subsi3_carry;
3682 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3683 ix86_gen_monitor = gen_sse3_monitor;
3684 ix86_gen_andsp = gen_andsi3;
3685 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3686 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3687 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3691 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3693 target_flags |= MASK_CLD & ~target_flags_explicit;
3696 if (!TARGET_64BIT && flag_pic)
3698 if (flag_fentry > 0)
3699 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3702 if (flag_fentry < 0)
3704 #if defined(PROFILE_BEFORE_PROLOGUE)
3711 /* Save the initial options in case the user does function specific options */
3713 target_option_default_node = target_option_current_node
3714 = build_target_option_node ();
3717 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3720 ix86_option_override (void)
3722 ix86_option_override_internal (true);
3725 /* Update register usage after having seen the compiler flags. */
3728 ix86_conditional_register_usage (void)
3733 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3735 if (fixed_regs[i] > 1)
3736 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3737 if (call_used_regs[i] > 1)
3738 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3741 /* The PIC register, if it exists, is fixed. */
3742 j = PIC_OFFSET_TABLE_REGNUM;
3743 if (j != INVALID_REGNUM)
3744 fixed_regs[j] = call_used_regs[j] = 1;
3746 /* The MS_ABI changes the set of call-used registers. */
3747 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3749 call_used_regs[SI_REG] = 0;
3750 call_used_regs[DI_REG] = 0;
3751 call_used_regs[XMM6_REG] = 0;
3752 call_used_regs[XMM7_REG] = 0;
3753 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3754 call_used_regs[i] = 0;
3757 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3758 other call-clobbered regs for 64-bit. */
3761 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3763 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3764 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3765 && call_used_regs[i])
3766 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3769 /* If MMX is disabled, squash the registers. */
3771 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3772 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3773 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3775 /* If SSE is disabled, squash the registers. */
3777 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3778 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3779 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3781 /* If the FPU is disabled, squash the registers. */
3782 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3783 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3784 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3785 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3787 /* If 32-bit, squash the 64-bit registers. */
3790 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3792 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3798 /* Save the current options */
3801 ix86_function_specific_save (struct cl_target_option *ptr)
3803 ptr->arch = ix86_arch;
3804 ptr->schedule = ix86_schedule;
3805 ptr->tune = ix86_tune;
3806 ptr->fpmath = ix86_fpmath;
3807 ptr->branch_cost = ix86_branch_cost;
3808 ptr->tune_defaulted = ix86_tune_defaulted;
3809 ptr->arch_specified = ix86_arch_specified;
3810 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3811 ptr->ix86_target_flags_explicit = target_flags_explicit;
3813 /* The fields are char but the variables are not; make sure the
3814 values fit in the fields. */
3815 gcc_assert (ptr->arch == ix86_arch);
3816 gcc_assert (ptr->schedule == ix86_schedule);
3817 gcc_assert (ptr->tune == ix86_tune);
3818 gcc_assert (ptr->fpmath == ix86_fpmath);
3819 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3822 /* Restore the current options */
3825 ix86_function_specific_restore (struct cl_target_option *ptr)
3827 enum processor_type old_tune = ix86_tune;
3828 enum processor_type old_arch = ix86_arch;
3829 unsigned int ix86_arch_mask, ix86_tune_mask;
3832 ix86_arch = (enum processor_type) ptr->arch;
3833 ix86_schedule = (enum attr_cpu) ptr->schedule;
3834 ix86_tune = (enum processor_type) ptr->tune;
3835 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3836 ix86_branch_cost = ptr->branch_cost;
3837 ix86_tune_defaulted = ptr->tune_defaulted;
3838 ix86_arch_specified = ptr->arch_specified;
3839 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3840 target_flags_explicit = ptr->ix86_target_flags_explicit;
3842 /* Recreate the arch feature tests if the arch changed */
3843 if (old_arch != ix86_arch)
3845 ix86_arch_mask = 1u << ix86_arch;
3846 for (i = 0; i < X86_ARCH_LAST; ++i)
3847 ix86_arch_features[i]
3848 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3851 /* Recreate the tune optimization tests */
3852 if (old_tune != ix86_tune)
3854 ix86_tune_mask = 1u << ix86_tune;
3855 for (i = 0; i < X86_TUNE_LAST; ++i)
3856 ix86_tune_features[i]
3857 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3861 /* Print the current options */
3864 ix86_function_specific_print (FILE *file, int indent,
3865 struct cl_target_option *ptr)
3868 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3869 NULL, NULL, NULL, false);
3871 fprintf (file, "%*sarch = %d (%s)\n",
3874 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3875 ? cpu_names[ptr->arch]
3878 fprintf (file, "%*stune = %d (%s)\n",
3881 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3882 ? cpu_names[ptr->tune]
3885 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3886 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3887 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3888 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3892 fprintf (file, "%*s%s\n", indent, "", target_string);
3893 free (target_string);
3898 /* Inner function to process the attribute((target(...))), take an argument and
3899 set the current options from the argument. If we have a list, recursively go
3903 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3908 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3909 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3910 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3911 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3926 enum ix86_opt_type type;
3931 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3932 IX86_ATTR_ISA ("abm", OPT_mabm),
3933 IX86_ATTR_ISA ("aes", OPT_maes),
3934 IX86_ATTR_ISA ("avx", OPT_mavx),
3935 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3936 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3937 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3938 IX86_ATTR_ISA ("sse", OPT_msse),
3939 IX86_ATTR_ISA ("sse2", OPT_msse2),
3940 IX86_ATTR_ISA ("sse3", OPT_msse3),
3941 IX86_ATTR_ISA ("sse4", OPT_msse4),
3942 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3943 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3944 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3945 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3946 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3947 IX86_ATTR_ISA ("xop", OPT_mxop),
3948 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3949 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3950 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3951 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3953 /* string options */
3954 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3955 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3956 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3959 IX86_ATTR_YES ("cld",
3963 IX86_ATTR_NO ("fancy-math-387",
3964 OPT_mfancy_math_387,
3965 MASK_NO_FANCY_MATH_387),
3967 IX86_ATTR_YES ("ieee-fp",
3971 IX86_ATTR_YES ("inline-all-stringops",
3972 OPT_minline_all_stringops,
3973 MASK_INLINE_ALL_STRINGOPS),
3975 IX86_ATTR_YES ("inline-stringops-dynamically",
3976 OPT_minline_stringops_dynamically,
3977 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3979 IX86_ATTR_NO ("align-stringops",
3980 OPT_mno_align_stringops,
3981 MASK_NO_ALIGN_STRINGOPS),
3983 IX86_ATTR_YES ("recip",
3989 /* If this is a list, recurse to get the options. */
3990 if (TREE_CODE (args) == TREE_LIST)
3994 for (; args; args = TREE_CHAIN (args))
3995 if (TREE_VALUE (args)
3996 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4002 else if (TREE_CODE (args) != STRING_CST)
4005 /* Handle multiple arguments separated by commas. */
4006 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4008 while (next_optstr && *next_optstr != '\0')
4010 char *p = next_optstr;
4012 char *comma = strchr (next_optstr, ',');
4013 const char *opt_string;
4014 size_t len, opt_len;
4019 enum ix86_opt_type type = ix86_opt_unknown;
4025 len = comma - next_optstr;
4026 next_optstr = comma + 1;
4034 /* Recognize no-xxx. */
4035 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4044 /* Find the option. */
4047 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4049 type = attrs[i].type;
4050 opt_len = attrs[i].len;
4051 if (ch == attrs[i].string[0]
4052 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4053 && memcmp (p, attrs[i].string, opt_len) == 0)
4056 mask = attrs[i].mask;
4057 opt_string = attrs[i].string;
4062 /* Process the option. */
4065 error ("attribute(target(\"%s\")) is unknown", orig_p);
4069 else if (type == ix86_opt_isa)
4070 ix86_handle_option (opt, p, opt_set_p);
4072 else if (type == ix86_opt_yes || type == ix86_opt_no)
4074 if (type == ix86_opt_no)
4075 opt_set_p = !opt_set_p;
4078 target_flags |= mask;
4080 target_flags &= ~mask;
4083 else if (type == ix86_opt_str)
4087 error ("option(\"%s\") was already specified", opt_string);
4091 p_strings[opt] = xstrdup (p + opt_len);
4101 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4104 ix86_valid_target_attribute_tree (tree args)
4106 const char *orig_arch_string = ix86_arch_string;
4107 const char *orig_tune_string = ix86_tune_string;
4108 const char *orig_fpmath_string = ix86_fpmath_string;
4109 int orig_tune_defaulted = ix86_tune_defaulted;
4110 int orig_arch_specified = ix86_arch_specified;
4111 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4114 struct cl_target_option *def
4115 = TREE_TARGET_OPTION (target_option_default_node);
4117 /* Process each of the options on the chain. */
4118 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4121 /* If the changed options are different from the default, rerun
4122 ix86_option_override_internal, and then save the options away.
4123 The string options are are attribute options, and will be undone
4124 when we copy the save structure. */
4125 if (ix86_isa_flags != def->x_ix86_isa_flags
4126 || target_flags != def->x_target_flags
4127 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4128 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4129 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4131 /* If we are using the default tune= or arch=, undo the string assigned,
4132 and use the default. */
4133 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4134 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4135 else if (!orig_arch_specified)
4136 ix86_arch_string = NULL;
4138 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4139 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4140 else if (orig_tune_defaulted)
4141 ix86_tune_string = NULL;
4143 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4144 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4145 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4146 else if (!TARGET_64BIT && TARGET_SSE)
4147 ix86_fpmath_string = "sse,387";
4149 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4150 ix86_option_override_internal (false);
4152 /* Add any builtin functions with the new isa if any. */
4153 ix86_add_new_builtins (ix86_isa_flags);
4155 /* Save the current options unless we are validating options for
4157 t = build_target_option_node ();
4159 ix86_arch_string = orig_arch_string;
4160 ix86_tune_string = orig_tune_string;
4161 ix86_fpmath_string = orig_fpmath_string;
4163 /* Free up memory allocated to hold the strings */
4164 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4165 if (option_strings[i])
4166 free (option_strings[i]);
4172 /* Hook to validate attribute((target("string"))). */
4175 ix86_valid_target_attribute_p (tree fndecl,
4176 tree ARG_UNUSED (name),
4178 int ARG_UNUSED (flags))
4180 struct cl_target_option cur_target;
4182 tree old_optimize = build_optimization_node ();
4183 tree new_target, new_optimize;
4184 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4186 /* If the function changed the optimization levels as well as setting target
4187 options, start with the optimizations specified. */
4188 if (func_optimize && func_optimize != old_optimize)
4189 cl_optimization_restore (&global_options,
4190 TREE_OPTIMIZATION (func_optimize));
4192 /* The target attributes may also change some optimization flags, so update
4193 the optimization options if necessary. */
4194 cl_target_option_save (&cur_target, &global_options);
4195 new_target = ix86_valid_target_attribute_tree (args);
4196 new_optimize = build_optimization_node ();
4203 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4205 if (old_optimize != new_optimize)
4206 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4209 cl_target_option_restore (&global_options, &cur_target);
4211 if (old_optimize != new_optimize)
4212 cl_optimization_restore (&global_options,
4213 TREE_OPTIMIZATION (old_optimize));
4219 /* Hook to determine if one function can safely inline another. */
4222 ix86_can_inline_p (tree caller, tree callee)
4225 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4226 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4228 /* If callee has no option attributes, then it is ok to inline. */
4232 /* If caller has no option attributes, but callee does then it is not ok to
4234 else if (!caller_tree)
4239 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4240 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4242 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4243 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4245 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4246 != callee_opts->x_ix86_isa_flags)
4249 /* See if we have the same non-isa options. */
4250 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4253 /* See if arch, tune, etc. are the same. */
4254 else if (caller_opts->arch != callee_opts->arch)
4257 else if (caller_opts->tune != callee_opts->tune)
4260 else if (caller_opts->fpmath != callee_opts->fpmath)
4263 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4274 /* Remember the last target of ix86_set_current_function. */
4275 static GTY(()) tree ix86_previous_fndecl;
4277 /* Establish appropriate back-end context for processing the function
4278 FNDECL. The argument might be NULL to indicate processing at top
4279 level, outside of any function scope. */
4281 ix86_set_current_function (tree fndecl)
4283 /* Only change the context if the function changes. This hook is called
4284 several times in the course of compiling a function, and we don't want to
4285 slow things down too much or call target_reinit when it isn't safe. */
4286 if (fndecl && fndecl != ix86_previous_fndecl)
4288 tree old_tree = (ix86_previous_fndecl
4289 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4292 tree new_tree = (fndecl
4293 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4296 ix86_previous_fndecl = fndecl;
4297 if (old_tree == new_tree)
4302 cl_target_option_restore (&global_options,
4303 TREE_TARGET_OPTION (new_tree));
4309 struct cl_target_option *def
4310 = TREE_TARGET_OPTION (target_option_current_node);
4312 cl_target_option_restore (&global_options, def);
4319 /* Return true if this goes in large data/bss. */
4322 ix86_in_large_data_p (tree exp)
4324 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4327 /* Functions are never large data. */
4328 if (TREE_CODE (exp) == FUNCTION_DECL)
4331 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4333 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4334 if (strcmp (section, ".ldata") == 0
4335 || strcmp (section, ".lbss") == 0)
4341 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4343 /* If this is an incomplete type with size 0, then we can't put it
4344 in data because it might be too big when completed. */
4345 if (!size || size > ix86_section_threshold)
4352 /* Switch to the appropriate section for output of DECL.
4353 DECL is either a `VAR_DECL' node or a constant of some sort.
4354 RELOC indicates whether forming the initial value of DECL requires
4355 link-time relocations. */
4357 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4361 x86_64_elf_select_section (tree decl, int reloc,
4362 unsigned HOST_WIDE_INT align)
4364 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4365 && ix86_in_large_data_p (decl))
4367 const char *sname = NULL;
4368 unsigned int flags = SECTION_WRITE;
4369 switch (categorize_decl_for_section (decl, reloc))
4374 case SECCAT_DATA_REL:
4375 sname = ".ldata.rel";
4377 case SECCAT_DATA_REL_LOCAL:
4378 sname = ".ldata.rel.local";
4380 case SECCAT_DATA_REL_RO:
4381 sname = ".ldata.rel.ro";
4383 case SECCAT_DATA_REL_RO_LOCAL:
4384 sname = ".ldata.rel.ro.local";
4388 flags |= SECTION_BSS;
4391 case SECCAT_RODATA_MERGE_STR:
4392 case SECCAT_RODATA_MERGE_STR_INIT:
4393 case SECCAT_RODATA_MERGE_CONST:
4397 case SECCAT_SRODATA:
4404 /* We don't split these for medium model. Place them into
4405 default sections and hope for best. */
4410 /* We might get called with string constants, but get_named_section
4411 doesn't like them as they are not DECLs. Also, we need to set
4412 flags in that case. */
4414 return get_section (sname, flags, NULL);
4415 return get_named_section (decl, sname, reloc);
4418 return default_elf_select_section (decl, reloc, align);
4421 /* Build up a unique section name, expressed as a
4422 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4423 RELOC indicates whether the initial value of EXP requires
4424 link-time relocations. */
4426 static void ATTRIBUTE_UNUSED
4427 x86_64_elf_unique_section (tree decl, int reloc)
4429 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4430 && ix86_in_large_data_p (decl))
4432 const char *prefix = NULL;
4433 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4434 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4436 switch (categorize_decl_for_section (decl, reloc))
4439 case SECCAT_DATA_REL:
4440 case SECCAT_DATA_REL_LOCAL:
4441 case SECCAT_DATA_REL_RO:
4442 case SECCAT_DATA_REL_RO_LOCAL:
4443 prefix = one_only ? ".ld" : ".ldata";
4446 prefix = one_only ? ".lb" : ".lbss";
4449 case SECCAT_RODATA_MERGE_STR:
4450 case SECCAT_RODATA_MERGE_STR_INIT:
4451 case SECCAT_RODATA_MERGE_CONST:
4452 prefix = one_only ? ".lr" : ".lrodata";
4454 case SECCAT_SRODATA:
4461 /* We don't split these for medium model. Place them into
4462 default sections and hope for best. */
4467 const char *name, *linkonce;
4470 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4471 name = targetm.strip_name_encoding (name);
4473 /* If we're using one_only, then there needs to be a .gnu.linkonce
4474 prefix to the section name. */
4475 linkonce = one_only ? ".gnu.linkonce" : "";
4477 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4479 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4483 default_unique_section (decl, reloc);
4486 #ifdef COMMON_ASM_OP
4487 /* This says how to output assembler code to declare an
4488 uninitialized external linkage data object.
4490 For medium model x86-64 we need to use .largecomm opcode for
4493 x86_elf_aligned_common (FILE *file,
4494 const char *name, unsigned HOST_WIDE_INT size,
4497 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4498 && size > (unsigned int)ix86_section_threshold)
4499 fputs (".largecomm\t", file);
4501 fputs (COMMON_ASM_OP, file);
4502 assemble_name (file, name);
4503 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4504 size, align / BITS_PER_UNIT);
4508 /* Utility function for targets to use in implementing
4509 ASM_OUTPUT_ALIGNED_BSS. */
4512 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4513 const char *name, unsigned HOST_WIDE_INT size,
4516 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4517 && size > (unsigned int)ix86_section_threshold)
4518 switch_to_section (get_named_section (decl, ".lbss", 0));
4520 switch_to_section (bss_section);
4521 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4522 #ifdef ASM_DECLARE_OBJECT_NAME
4523 last_assemble_variable_decl = decl;
4524 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4526 /* Standard thing is just output label for the object. */
4527 ASM_OUTPUT_LABEL (file, name);
4528 #endif /* ASM_DECLARE_OBJECT_NAME */
4529 ASM_OUTPUT_SKIP (file, size ? size : 1);
4533 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4535 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4536 make the problem with not enough registers even worse. */
4537 #ifdef INSN_SCHEDULING
4539 flag_schedule_insns = 0;
4542 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4543 SUBTARGET_OPTIMIZATION_OPTIONS;
4547 /* Implement TARGET_OPTION_INIT_STRUCT. */
4550 ix86_option_init_struct (struct gcc_options *opts)
4553 /* The Darwin libraries never set errno, so we might as well
4554 avoid calling them when that's the only reason we would. */
4555 opts->x_flag_errno_math = 0;
4557 opts->x_flag_pcc_struct_return = 2;
4558 opts->x_flag_asynchronous_unwind_tables = 2;
4559 opts->x_flag_vect_cost_model = 1;
4562 /* Decide whether we must probe the stack before any space allocation
4563 on this target. It's essentially TARGET_STACK_PROBE except when
4564 -fstack-check causes the stack to be already probed differently. */
4567 ix86_target_stack_probe (void)
4569 /* Do not probe the stack twice if static stack checking is enabled. */
4570 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4573 return TARGET_STACK_PROBE;
4576 /* Decide whether we can make a sibling call to a function. DECL is the
4577 declaration of the function being targeted by the call and EXP is the
4578 CALL_EXPR representing the call. */
4581 ix86_function_ok_for_sibcall (tree decl, tree exp)
4583 tree type, decl_or_type;
4586 /* If we are generating position-independent code, we cannot sibcall
4587 optimize any indirect call, or a direct call to a global function,
4588 as the PLT requires %ebx be live. */
4589 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4592 /* If we need to align the outgoing stack, then sibcalling would
4593 unalign the stack, which may break the called function. */
4594 if (ix86_minimum_incoming_stack_boundary (true)
4595 < PREFERRED_STACK_BOUNDARY)
4600 decl_or_type = decl;
4601 type = TREE_TYPE (decl);
4605 /* We're looking at the CALL_EXPR, we need the type of the function. */
4606 type = CALL_EXPR_FN (exp); /* pointer expression */
4607 type = TREE_TYPE (type); /* pointer type */
4608 type = TREE_TYPE (type); /* function type */
4609 decl_or_type = type;
4612 /* Check that the return value locations are the same. Like
4613 if we are returning floats on the 80387 register stack, we cannot
4614 make a sibcall from a function that doesn't return a float to a
4615 function that does or, conversely, from a function that does return
4616 a float to a function that doesn't; the necessary stack adjustment
4617 would not be executed. This is also the place we notice
4618 differences in the return value ABI. Note that it is ok for one
4619 of the functions to have void return type as long as the return
4620 value of the other is passed in a register. */
4621 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4622 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4624 if (STACK_REG_P (a) || STACK_REG_P (b))
4626 if (!rtx_equal_p (a, b))
4629 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4631 else if (!rtx_equal_p (a, b))
4636 /* The SYSV ABI has more call-clobbered registers;
4637 disallow sibcalls from MS to SYSV. */
4638 if (cfun->machine->call_abi == MS_ABI
4639 && ix86_function_type_abi (type) == SYSV_ABI)
4644 /* If this call is indirect, we'll need to be able to use a
4645 call-clobbered register for the address of the target function.
4646 Make sure that all such registers are not used for passing
4647 parameters. Note that DLLIMPORT functions are indirect. */
4649 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4651 if (ix86_function_regparm (type, NULL) >= 3)
4653 /* ??? Need to count the actual number of registers to be used,
4654 not the possible number of registers. Fix later. */
4660 /* Otherwise okay. That also includes certain types of indirect calls. */
4664 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4665 and "sseregparm" calling convention attributes;
4666 arguments as in struct attribute_spec.handler. */
4669 ix86_handle_cconv_attribute (tree *node, tree name,
4671 int flags ATTRIBUTE_UNUSED,
4674 if (TREE_CODE (*node) != FUNCTION_TYPE
4675 && TREE_CODE (*node) != METHOD_TYPE
4676 && TREE_CODE (*node) != FIELD_DECL
4677 && TREE_CODE (*node) != TYPE_DECL)
4679 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4681 *no_add_attrs = true;
4685 /* Can combine regparm with all attributes but fastcall. */
4686 if (is_attribute_p ("regparm", name))
4690 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4692 error ("fastcall and regparm attributes are not compatible");
4695 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4697 error ("regparam and thiscall attributes are not compatible");
4700 cst = TREE_VALUE (args);
4701 if (TREE_CODE (cst) != INTEGER_CST)
4703 warning (OPT_Wattributes,
4704 "%qE attribute requires an integer constant argument",
4706 *no_add_attrs = true;
4708 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4710 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4712 *no_add_attrs = true;
4720 /* Do not warn when emulating the MS ABI. */
4721 if ((TREE_CODE (*node) != FUNCTION_TYPE
4722 && TREE_CODE (*node) != METHOD_TYPE)
4723 || ix86_function_type_abi (*node) != MS_ABI)
4724 warning (OPT_Wattributes, "%qE attribute ignored",
4726 *no_add_attrs = true;
4730 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4731 if (is_attribute_p ("fastcall", name))
4733 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4735 error ("fastcall and cdecl attributes are not compatible");
4737 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4739 error ("fastcall and stdcall attributes are not compatible");
4741 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4743 error ("fastcall and regparm attributes are not compatible");
4745 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4747 error ("fastcall and thiscall attributes are not compatible");
4751 /* Can combine stdcall with fastcall (redundant), regparm and
4753 else if (is_attribute_p ("stdcall", name))
4755 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4757 error ("stdcall and cdecl attributes are not compatible");
4759 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4761 error ("stdcall and fastcall attributes are not compatible");
4763 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4765 error ("stdcall and thiscall attributes are not compatible");
4769 /* Can combine cdecl with regparm and sseregparm. */
4770 else if (is_attribute_p ("cdecl", name))
4772 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4774 error ("stdcall and cdecl attributes are not compatible");
4776 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4778 error ("fastcall and cdecl attributes are not compatible");
4780 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4782 error ("cdecl and thiscall attributes are not compatible");
4785 else if (is_attribute_p ("thiscall", name))
4787 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4788 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4790 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4792 error ("stdcall and thiscall attributes are not compatible");
4794 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4796 error ("fastcall and thiscall attributes are not compatible");
4798 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4800 error ("cdecl and thiscall attributes are not compatible");
4804 /* Can combine sseregparm with all attributes. */
4809 /* Return 0 if the attributes for two types are incompatible, 1 if they
4810 are compatible, and 2 if they are nearly compatible (which causes a
4811 warning to be generated). */
4814 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4816 /* Check for mismatch of non-default calling convention. */
4817 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4819 if (TREE_CODE (type1) != FUNCTION_TYPE
4820 && TREE_CODE (type1) != METHOD_TYPE)
4823 /* Check for mismatched fastcall/regparm types. */
4824 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4825 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4826 || (ix86_function_regparm (type1, NULL)
4827 != ix86_function_regparm (type2, NULL)))
4830 /* Check for mismatched sseregparm types. */
4831 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4832 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4835 /* Check for mismatched thiscall types. */
4836 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4837 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4840 /* Check for mismatched return types (cdecl vs stdcall). */
4841 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4842 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4848 /* Return the regparm value for a function with the indicated TYPE and DECL.
4849 DECL may be NULL when calling function indirectly
4850 or considering a libcall. */
4853 ix86_function_regparm (const_tree type, const_tree decl)
4859 return (ix86_function_type_abi (type) == SYSV_ABI
4860 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4862 regparm = ix86_regparm;
4863 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4866 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4870 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4873 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4876 /* Use register calling convention for local functions when possible. */
4878 && TREE_CODE (decl) == FUNCTION_DECL
4880 && !(profile_flag && !flag_fentry))
4882 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4883 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4886 int local_regparm, globals = 0, regno;
4888 /* Make sure no regparm register is taken by a
4889 fixed register variable. */
4890 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4891 if (fixed_regs[local_regparm])
4894 /* We don't want to use regparm(3) for nested functions as
4895 these use a static chain pointer in the third argument. */
4896 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4899 /* In 32-bit mode save a register for the split stack. */
4900 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4903 /* Each fixed register usage increases register pressure,
4904 so less registers should be used for argument passing.
4905 This functionality can be overriden by an explicit
4907 for (regno = 0; regno <= DI_REG; regno++)
4908 if (fixed_regs[regno])
4912 = globals < local_regparm ? local_regparm - globals : 0;
4914 if (local_regparm > regparm)
4915 regparm = local_regparm;
4922 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4923 DFmode (2) arguments in SSE registers for a function with the
4924 indicated TYPE and DECL. DECL may be NULL when calling function
4925 indirectly or considering a libcall. Otherwise return 0. */
4928 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4930 gcc_assert (!TARGET_64BIT);
4932 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4933 by the sseregparm attribute. */
4934 if (TARGET_SSEREGPARM
4935 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4942 error ("Calling %qD with attribute sseregparm without "
4943 "SSE/SSE2 enabled", decl);
4945 error ("Calling %qT with attribute sseregparm without "
4946 "SSE/SSE2 enabled", type);
4954 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4955 (and DFmode for SSE2) arguments in SSE registers. */
4956 if (decl && TARGET_SSE_MATH && optimize
4957 && !(profile_flag && !flag_fentry))
4959 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4960 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4962 return TARGET_SSE2 ? 2 : 1;
4968 /* Return true if EAX is live at the start of the function. Used by
4969 ix86_expand_prologue to determine if we need special help before
4970 calling allocate_stack_worker. */
4973 ix86_eax_live_at_start_p (void)
4975 /* Cheat. Don't bother working forward from ix86_function_regparm
4976 to the function type to whether an actual argument is located in
4977 eax. Instead just look at cfg info, which is still close enough
4978 to correct at this point. This gives false positives for broken
4979 functions that might use uninitialized data that happens to be
4980 allocated in eax, but who cares? */
4981 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4984 /* Value is the number of bytes of arguments automatically
4985 popped when returning from a subroutine call.
4986 FUNDECL is the declaration node of the function (as a tree),
4987 FUNTYPE is the data type of the function (as a tree),
4988 or for a library call it is an identifier node for the subroutine name.
4989 SIZE is the number of bytes of arguments passed on the stack.
4991 On the 80386, the RTD insn may be used to pop them if the number
4992 of args is fixed, but if the number is variable then the caller
4993 must pop them all. RTD can't be used for library calls now
4994 because the library is compiled with the Unix compiler.
4995 Use of RTD is a selectable option, since it is incompatible with
4996 standard Unix calling sequences. If the option is not selected,
4997 the caller must always pop the args.
4999 The attribute stdcall is equivalent to RTD on a per module basis. */
5002 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5006 /* None of the 64-bit ABIs pop arguments. */
5010 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5012 /* Cdecl functions override -mrtd, and never pop the stack. */
5013 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5015 /* Stdcall and fastcall functions will pop the stack if not
5017 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5018 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5019 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5022 if (rtd && ! stdarg_p (funtype))
5026 /* Lose any fake structure return argument if it is passed on the stack. */
5027 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5028 && !KEEP_AGGREGATE_RETURN_POINTER)
5030 int nregs = ix86_function_regparm (funtype, fundecl);
5032 return GET_MODE_SIZE (Pmode);
5038 /* Argument support functions. */
5040 /* Return true when register may be used to pass function parameters. */
5042 ix86_function_arg_regno_p (int regno)
5045 const int *parm_regs;
5050 return (regno < REGPARM_MAX
5051 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5053 return (regno < REGPARM_MAX
5054 || (TARGET_MMX && MMX_REGNO_P (regno)
5055 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5056 || (TARGET_SSE && SSE_REGNO_P (regno)
5057 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5062 if (SSE_REGNO_P (regno) && TARGET_SSE)
5067 if (TARGET_SSE && SSE_REGNO_P (regno)
5068 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5072 /* TODO: The function should depend on current function ABI but
5073 builtins.c would need updating then. Therefore we use the
5076 /* RAX is used as hidden argument to va_arg functions. */
5077 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5080 if (ix86_abi == MS_ABI)
5081 parm_regs = x86_64_ms_abi_int_parameter_registers;
5083 parm_regs = x86_64_int_parameter_registers;
5084 for (i = 0; i < (ix86_abi == MS_ABI
5085 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5086 if (regno == parm_regs[i])
5091 /* Return if we do not know how to pass TYPE solely in registers. */
5094 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5096 if (must_pass_in_stack_var_size_or_pad (mode, type))
5099 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5100 The layout_type routine is crafty and tries to trick us into passing
5101 currently unsupported vector types on the stack by using TImode. */
5102 return (!TARGET_64BIT && mode == TImode
5103 && type && TREE_CODE (type) != VECTOR_TYPE);
5106 /* It returns the size, in bytes, of the area reserved for arguments passed
5107 in registers for the function represented by fndecl dependent to the used
5110 ix86_reg_parm_stack_space (const_tree fndecl)
5112 enum calling_abi call_abi = SYSV_ABI;
5113 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5114 call_abi = ix86_function_abi (fndecl);
5116 call_abi = ix86_function_type_abi (fndecl);
5117 if (call_abi == MS_ABI)
5122 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5125 ix86_function_type_abi (const_tree fntype)
5127 if (TARGET_64BIT && fntype != NULL)
5129 enum calling_abi abi = ix86_abi;
5130 if (abi == SYSV_ABI)
5132 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5135 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5143 ix86_function_ms_hook_prologue (const_tree fn)
5145 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5147 if (decl_function_context (fn) != NULL_TREE)
5148 error_at (DECL_SOURCE_LOCATION (fn),
5149 "ms_hook_prologue is not compatible with nested function");
5156 static enum calling_abi
5157 ix86_function_abi (const_tree fndecl)
5161 return ix86_function_type_abi (TREE_TYPE (fndecl));
5164 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5167 ix86_cfun_abi (void)
5169 if (! cfun || ! TARGET_64BIT)
5171 return cfun->machine->call_abi;
5174 /* Write the extra assembler code needed to declare a function properly. */
5177 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5180 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5184 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5185 unsigned int filler_cc = 0xcccccccc;
5187 for (i = 0; i < filler_count; i += 4)
5188 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5191 ASM_OUTPUT_LABEL (asm_out_file, fname);
5193 /* Output magic byte marker, if hot-patch attribute is set. */
5198 /* leaq [%rsp + 0], %rsp */
5199 asm_fprintf (asm_out_file, ASM_BYTE
5200 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5204 /* movl.s %edi, %edi
5206 movl.s %esp, %ebp */
5207 asm_fprintf (asm_out_file, ASM_BYTE
5208 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5214 extern void init_regs (void);
5216 /* Implementation of call abi switching target hook. Specific to FNDECL
5217 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5218 for more details. */
5220 ix86_call_abi_override (const_tree fndecl)
5222 if (fndecl == NULL_TREE)
5223 cfun->machine->call_abi = ix86_abi;
5225 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5228 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5229 re-initialization of init_regs each time we switch function context since
5230 this is needed only during RTL expansion. */
5232 ix86_maybe_switch_abi (void)
5235 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5239 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5240 for a call to a function whose data type is FNTYPE.
5241 For a library call, FNTYPE is 0. */
5244 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5245 tree fntype, /* tree ptr for function decl */
5246 rtx libname, /* SYMBOL_REF of library name or 0 */
5249 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5250 memset (cum, 0, sizeof (*cum));
5253 cum->call_abi = ix86_function_abi (fndecl);
5255 cum->call_abi = ix86_function_type_abi (fntype);
5256 /* Set up the number of registers to use for passing arguments. */
5258 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5259 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5260 "or subtarget optimization implying it");
5261 cum->nregs = ix86_regparm;
5264 cum->nregs = (cum->call_abi == SYSV_ABI
5265 ? X86_64_REGPARM_MAX
5266 : X86_64_MS_REGPARM_MAX);
5270 cum->sse_nregs = SSE_REGPARM_MAX;
5273 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5274 ? X86_64_SSE_REGPARM_MAX
5275 : X86_64_MS_SSE_REGPARM_MAX);
5279 cum->mmx_nregs = MMX_REGPARM_MAX;
5280 cum->warn_avx = true;
5281 cum->warn_sse = true;
5282 cum->warn_mmx = true;
5284 /* Because type might mismatch in between caller and callee, we need to
5285 use actual type of function for local calls.
5286 FIXME: cgraph_analyze can be told to actually record if function uses
5287 va_start so for local functions maybe_vaarg can be made aggressive
5289 FIXME: once typesytem is fixed, we won't need this code anymore. */
5291 fntype = TREE_TYPE (fndecl);
5292 cum->maybe_vaarg = (fntype
5293 ? (!prototype_p (fntype) || stdarg_p (fntype))
5298 /* If there are variable arguments, then we won't pass anything
5299 in registers in 32-bit mode. */
5300 if (stdarg_p (fntype))
5311 /* Use ecx and edx registers if function has fastcall attribute,
5312 else look for regparm information. */
5315 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5318 cum->fastcall = 1; /* Same first register as in fastcall. */
5320 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5326 cum->nregs = ix86_function_regparm (fntype, fndecl);
5329 /* Set up the number of SSE registers used for passing SFmode
5330 and DFmode arguments. Warn for mismatching ABI. */
5331 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5335 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5336 But in the case of vector types, it is some vector mode.
5338 When we have only some of our vector isa extensions enabled, then there
5339 are some modes for which vector_mode_supported_p is false. For these
5340 modes, the generic vector support in gcc will choose some non-vector mode
5341 in order to implement the type. By computing the natural mode, we'll
5342 select the proper ABI location for the operand and not depend on whatever
5343 the middle-end decides to do with these vector types.
5345 The midde-end can't deal with the vector types > 16 bytes. In this
5346 case, we return the original mode and warn ABI change if CUM isn't
5349 static enum machine_mode
5350 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5352 enum machine_mode mode = TYPE_MODE (type);
5354 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5356 HOST_WIDE_INT size = int_size_in_bytes (type);
5357 if ((size == 8 || size == 16 || size == 32)
5358 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5359 && TYPE_VECTOR_SUBPARTS (type) > 1)
5361 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5363 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5364 mode = MIN_MODE_VECTOR_FLOAT;
5366 mode = MIN_MODE_VECTOR_INT;
5368 /* Get the mode which has this inner mode and number of units. */
5369 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5370 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5371 && GET_MODE_INNER (mode) == innermode)
5373 if (size == 32 && !TARGET_AVX)
5375 static bool warnedavx;
5382 warning (0, "AVX vector argument without AVX "
5383 "enabled changes the ABI");
5385 return TYPE_MODE (type);
5398 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5399 this may not agree with the mode that the type system has chosen for the
5400 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5401 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5404 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5409 if (orig_mode != BLKmode)
5410 tmp = gen_rtx_REG (orig_mode, regno);
5413 tmp = gen_rtx_REG (mode, regno);
5414 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5415 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5421 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5422 of this code is to classify each 8bytes of incoming argument by the register
5423 class and assign registers accordingly. */
5425 /* Return the union class of CLASS1 and CLASS2.
5426 See the x86-64 PS ABI for details. */
5428 static enum x86_64_reg_class
5429 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5431 /* Rule #1: If both classes are equal, this is the resulting class. */
5432 if (class1 == class2)
5435 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5437 if (class1 == X86_64_NO_CLASS)
5439 if (class2 == X86_64_NO_CLASS)
5442 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5443 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5444 return X86_64_MEMORY_CLASS;
5446 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5447 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5448 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5449 return X86_64_INTEGERSI_CLASS;
5450 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5451 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5452 return X86_64_INTEGER_CLASS;
5454 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5456 if (class1 == X86_64_X87_CLASS
5457 || class1 == X86_64_X87UP_CLASS
5458 || class1 == X86_64_COMPLEX_X87_CLASS
5459 || class2 == X86_64_X87_CLASS
5460 || class2 == X86_64_X87UP_CLASS
5461 || class2 == X86_64_COMPLEX_X87_CLASS)
5462 return X86_64_MEMORY_CLASS;
5464 /* Rule #6: Otherwise class SSE is used. */
5465 return X86_64_SSE_CLASS;
5468 /* Classify the argument of type TYPE and mode MODE.
5469 CLASSES will be filled by the register class used to pass each word
5470 of the operand. The number of words is returned. In case the parameter
5471 should be passed in memory, 0 is returned. As a special case for zero
5472 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5474 BIT_OFFSET is used internally for handling records and specifies offset
5475 of the offset in bits modulo 256 to avoid overflow cases.
5477 See the x86-64 PS ABI for details.
5481 classify_argument (enum machine_mode mode, const_tree type,
5482 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5484 HOST_WIDE_INT bytes =
5485 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5486 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5488 /* Variable sized entities are always passed/returned in memory. */
5492 if (mode != VOIDmode
5493 && targetm.calls.must_pass_in_stack (mode, type))
5496 if (type && AGGREGATE_TYPE_P (type))
5500 enum x86_64_reg_class subclasses[MAX_CLASSES];
5502 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5506 for (i = 0; i < words; i++)
5507 classes[i] = X86_64_NO_CLASS;
5509 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5510 signalize memory class, so handle it as special case. */
5513 classes[0] = X86_64_NO_CLASS;
5517 /* Classify each field of record and merge classes. */
5518 switch (TREE_CODE (type))
5521 /* And now merge the fields of structure. */
5522 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5524 if (TREE_CODE (field) == FIELD_DECL)
5528 if (TREE_TYPE (field) == error_mark_node)
5531 /* Bitfields are always classified as integer. Handle them
5532 early, since later code would consider them to be
5533 misaligned integers. */
5534 if (DECL_BIT_FIELD (field))
5536 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5537 i < ((int_bit_position (field) + (bit_offset % 64))
5538 + tree_low_cst (DECL_SIZE (field), 0)
5541 merge_classes (X86_64_INTEGER_CLASS,
5548 type = TREE_TYPE (field);
5550 /* Flexible array member is ignored. */
5551 if (TYPE_MODE (type) == BLKmode
5552 && TREE_CODE (type) == ARRAY_TYPE
5553 && TYPE_SIZE (type) == NULL_TREE
5554 && TYPE_DOMAIN (type) != NULL_TREE
5555 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5560 if (!warned && warn_psabi)
5563 inform (input_location,
5564 "The ABI of passing struct with"
5565 " a flexible array member has"
5566 " changed in GCC 4.4");
5570 num = classify_argument (TYPE_MODE (type), type,
5572 (int_bit_position (field)
5573 + bit_offset) % 256);
5576 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5577 for (i = 0; i < num && (i + pos) < words; i++)
5579 merge_classes (subclasses[i], classes[i + pos]);
5586 /* Arrays are handled as small records. */
5589 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5590 TREE_TYPE (type), subclasses, bit_offset);
5594 /* The partial classes are now full classes. */
5595 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5596 subclasses[0] = X86_64_SSE_CLASS;
5597 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5598 && !((bit_offset % 64) == 0 && bytes == 4))
5599 subclasses[0] = X86_64_INTEGER_CLASS;
5601 for (i = 0; i < words; i++)
5602 classes[i] = subclasses[i % num];
5607 case QUAL_UNION_TYPE:
5608 /* Unions are similar to RECORD_TYPE but offset is always 0.
5610 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5612 if (TREE_CODE (field) == FIELD_DECL)
5616 if (TREE_TYPE (field) == error_mark_node)
5619 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5620 TREE_TYPE (field), subclasses,
5624 for (i = 0; i < num; i++)
5625 classes[i] = merge_classes (subclasses[i], classes[i]);
5636 /* When size > 16 bytes, if the first one isn't
5637 X86_64_SSE_CLASS or any other ones aren't
5638 X86_64_SSEUP_CLASS, everything should be passed in
5640 if (classes[0] != X86_64_SSE_CLASS)
5643 for (i = 1; i < words; i++)
5644 if (classes[i] != X86_64_SSEUP_CLASS)
5648 /* Final merger cleanup. */
5649 for (i = 0; i < words; i++)
5651 /* If one class is MEMORY, everything should be passed in
5653 if (classes[i] == X86_64_MEMORY_CLASS)
5656 /* The X86_64_SSEUP_CLASS should be always preceded by
5657 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5658 if (classes[i] == X86_64_SSEUP_CLASS
5659 && classes[i - 1] != X86_64_SSE_CLASS
5660 && classes[i - 1] != X86_64_SSEUP_CLASS)
5662 /* The first one should never be X86_64_SSEUP_CLASS. */
5663 gcc_assert (i != 0);
5664 classes[i] = X86_64_SSE_CLASS;
5667 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5668 everything should be passed in memory. */
5669 if (classes[i] == X86_64_X87UP_CLASS
5670 && (classes[i - 1] != X86_64_X87_CLASS))
5674 /* The first one should never be X86_64_X87UP_CLASS. */
5675 gcc_assert (i != 0);
5676 if (!warned && warn_psabi)
5679 inform (input_location,
5680 "The ABI of passing union with long double"
5681 " has changed in GCC 4.4");
5689 /* Compute alignment needed. We align all types to natural boundaries with
5690 exception of XFmode that is aligned to 64bits. */
5691 if (mode != VOIDmode && mode != BLKmode)
5693 int mode_alignment = GET_MODE_BITSIZE (mode);
5696 mode_alignment = 128;
5697 else if (mode == XCmode)
5698 mode_alignment = 256;
5699 if (COMPLEX_MODE_P (mode))
5700 mode_alignment /= 2;
5701 /* Misaligned fields are always returned in memory. */
5702 if (bit_offset % mode_alignment)
5706 /* for V1xx modes, just use the base mode */
5707 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5708 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5709 mode = GET_MODE_INNER (mode);
5711 /* Classification of atomic types. */
5716 classes[0] = X86_64_SSE_CLASS;
5719 classes[0] = X86_64_SSE_CLASS;
5720 classes[1] = X86_64_SSEUP_CLASS;
5730 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5734 classes[0] = X86_64_INTEGERSI_CLASS;
5737 else if (size <= 64)
5739 classes[0] = X86_64_INTEGER_CLASS;
5742 else if (size <= 64+32)
5744 classes[0] = X86_64_INTEGER_CLASS;
5745 classes[1] = X86_64_INTEGERSI_CLASS;
5748 else if (size <= 64+64)
5750 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5758 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5762 /* OImode shouldn't be used directly. */
5767 if (!(bit_offset % 64))
5768 classes[0] = X86_64_SSESF_CLASS;
5770 classes[0] = X86_64_SSE_CLASS;
5773 classes[0] = X86_64_SSEDF_CLASS;
5776 classes[0] = X86_64_X87_CLASS;
5777 classes[1] = X86_64_X87UP_CLASS;
5780 classes[0] = X86_64_SSE_CLASS;
5781 classes[1] = X86_64_SSEUP_CLASS;
5784 classes[0] = X86_64_SSE_CLASS;
5785 if (!(bit_offset % 64))
5791 if (!warned && warn_psabi)
5794 inform (input_location,
5795 "The ABI of passing structure with complex float"
5796 " member has changed in GCC 4.4");
5798 classes[1] = X86_64_SSESF_CLASS;
5802 classes[0] = X86_64_SSEDF_CLASS;
5803 classes[1] = X86_64_SSEDF_CLASS;
5806 classes[0] = X86_64_COMPLEX_X87_CLASS;
5809 /* This modes is larger than 16 bytes. */
5817 classes[0] = X86_64_SSE_CLASS;
5818 classes[1] = X86_64_SSEUP_CLASS;
5819 classes[2] = X86_64_SSEUP_CLASS;
5820 classes[3] = X86_64_SSEUP_CLASS;
5828 classes[0] = X86_64_SSE_CLASS;
5829 classes[1] = X86_64_SSEUP_CLASS;
5837 classes[0] = X86_64_SSE_CLASS;
5843 gcc_assert (VECTOR_MODE_P (mode));
5848 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5850 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5851 classes[0] = X86_64_INTEGERSI_CLASS;
5853 classes[0] = X86_64_INTEGER_CLASS;
5854 classes[1] = X86_64_INTEGER_CLASS;
5855 return 1 + (bytes > 8);
5859 /* Examine the argument and return set number of register required in each
5860 class. Return 0 iff parameter should be passed in memory. */
5862 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5863 int *int_nregs, int *sse_nregs)
5865 enum x86_64_reg_class regclass[MAX_CLASSES];
5866 int n = classify_argument (mode, type, regclass, 0);
5872 for (n--; n >= 0; n--)
5873 switch (regclass[n])
5875 case X86_64_INTEGER_CLASS:
5876 case X86_64_INTEGERSI_CLASS:
5879 case X86_64_SSE_CLASS:
5880 case X86_64_SSESF_CLASS:
5881 case X86_64_SSEDF_CLASS:
5884 case X86_64_NO_CLASS:
5885 case X86_64_SSEUP_CLASS:
5887 case X86_64_X87_CLASS:
5888 case X86_64_X87UP_CLASS:
5892 case X86_64_COMPLEX_X87_CLASS:
5893 return in_return ? 2 : 0;
5894 case X86_64_MEMORY_CLASS:
5900 /* Construct container for the argument used by GCC interface. See
5901 FUNCTION_ARG for the detailed description. */
5904 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5905 const_tree type, int in_return, int nintregs, int nsseregs,
5906 const int *intreg, int sse_regno)
5908 /* The following variables hold the static issued_error state. */
5909 static bool issued_sse_arg_error;
5910 static bool issued_sse_ret_error;
5911 static bool issued_x87_ret_error;
5913 enum machine_mode tmpmode;
5915 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5916 enum x86_64_reg_class regclass[MAX_CLASSES];
5920 int needed_sseregs, needed_intregs;
5921 rtx exp[MAX_CLASSES];
5924 n = classify_argument (mode, type, regclass, 0);
5927 if (!examine_argument (mode, type, in_return, &needed_intregs,
5930 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5933 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5934 some less clueful developer tries to use floating-point anyway. */
5935 if (needed_sseregs && !TARGET_SSE)
5939 if (!issued_sse_ret_error)
5941 error ("SSE register return with SSE disabled");
5942 issued_sse_ret_error = true;
5945 else if (!issued_sse_arg_error)
5947 error ("SSE register argument with SSE disabled");
5948 issued_sse_arg_error = true;
5953 /* Likewise, error if the ABI requires us to return values in the
5954 x87 registers and the user specified -mno-80387. */
5955 if (!TARGET_80387 && in_return)
5956 for (i = 0; i < n; i++)
5957 if (regclass[i] == X86_64_X87_CLASS
5958 || regclass[i] == X86_64_X87UP_CLASS
5959 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5961 if (!issued_x87_ret_error)
5963 error ("x87 register return with x87 disabled");
5964 issued_x87_ret_error = true;
5969 /* First construct simple cases. Avoid SCmode, since we want to use
5970 single register to pass this type. */
5971 if (n == 1 && mode != SCmode)
5972 switch (regclass[0])
5974 case X86_64_INTEGER_CLASS:
5975 case X86_64_INTEGERSI_CLASS:
5976 return gen_rtx_REG (mode, intreg[0]);
5977 case X86_64_SSE_CLASS:
5978 case X86_64_SSESF_CLASS:
5979 case X86_64_SSEDF_CLASS:
5980 if (mode != BLKmode)
5981 return gen_reg_or_parallel (mode, orig_mode,
5982 SSE_REGNO (sse_regno));
5984 case X86_64_X87_CLASS:
5985 case X86_64_COMPLEX_X87_CLASS:
5986 return gen_rtx_REG (mode, FIRST_STACK_REG);
5987 case X86_64_NO_CLASS:
5988 /* Zero sized array, struct or class. */
5993 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5994 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5995 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5997 && regclass[0] == X86_64_SSE_CLASS
5998 && regclass[1] == X86_64_SSEUP_CLASS
5999 && regclass[2] == X86_64_SSEUP_CLASS
6000 && regclass[3] == X86_64_SSEUP_CLASS
6002 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6005 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6006 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6007 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6008 && regclass[1] == X86_64_INTEGER_CLASS
6009 && (mode == CDImode || mode == TImode || mode == TFmode)
6010 && intreg[0] + 1 == intreg[1])
6011 return gen_rtx_REG (mode, intreg[0]);
6013 /* Otherwise figure out the entries of the PARALLEL. */
6014 for (i = 0; i < n; i++)
6018 switch (regclass[i])
6020 case X86_64_NO_CLASS:
6022 case X86_64_INTEGER_CLASS:
6023 case X86_64_INTEGERSI_CLASS:
6024 /* Merge TImodes on aligned occasions here too. */
6025 if (i * 8 + 8 > bytes)
6026 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6027 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6031 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6032 if (tmpmode == BLKmode)
6034 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6035 gen_rtx_REG (tmpmode, *intreg),
6039 case X86_64_SSESF_CLASS:
6040 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6041 gen_rtx_REG (SFmode,
6042 SSE_REGNO (sse_regno)),
6046 case X86_64_SSEDF_CLASS:
6047 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6048 gen_rtx_REG (DFmode,
6049 SSE_REGNO (sse_regno)),
6053 case X86_64_SSE_CLASS:
6061 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6071 && regclass[1] == X86_64_SSEUP_CLASS
6072 && regclass[2] == X86_64_SSEUP_CLASS
6073 && regclass[3] == X86_64_SSEUP_CLASS);
6080 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6081 gen_rtx_REG (tmpmode,
6082 SSE_REGNO (sse_regno)),
6091 /* Empty aligned struct, union or class. */
6095 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6096 for (i = 0; i < nexps; i++)
6097 XVECEXP (ret, 0, i) = exp [i];
6101 /* Update the data in CUM to advance over an argument of mode MODE
6102 and data type TYPE. (TYPE is null for libcalls where that information
6103 may not be available.) */
6106 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6107 const_tree type, HOST_WIDE_INT bytes,
6108 HOST_WIDE_INT words)
6124 cum->words += words;
6125 cum->nregs -= words;
6126 cum->regno += words;
6128 if (cum->nregs <= 0)
6136 /* OImode shouldn't be used directly. */
6140 if (cum->float_in_sse < 2)
6143 if (cum->float_in_sse < 1)
6160 if (!type || !AGGREGATE_TYPE_P (type))
6162 cum->sse_words += words;
6163 cum->sse_nregs -= 1;
6164 cum->sse_regno += 1;
6165 if (cum->sse_nregs <= 0)
6179 if (!type || !AGGREGATE_TYPE_P (type))
6181 cum->mmx_words += words;
6182 cum->mmx_nregs -= 1;
6183 cum->mmx_regno += 1;
6184 if (cum->mmx_nregs <= 0)
6195 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6196 const_tree type, HOST_WIDE_INT words, bool named)
6198 int int_nregs, sse_nregs;
6200 /* Unnamed 256bit vector mode parameters are passed on stack. */
6201 if (!named && VALID_AVX256_REG_MODE (mode))
6204 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6205 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6207 cum->nregs -= int_nregs;
6208 cum->sse_nregs -= sse_nregs;
6209 cum->regno += int_nregs;
6210 cum->sse_regno += sse_nregs;
6214 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6215 cum->words = (cum->words + align - 1) & ~(align - 1);
6216 cum->words += words;
6221 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6222 HOST_WIDE_INT words)
6224 /* Otherwise, this should be passed indirect. */
6225 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6227 cum->words += words;
6235 /* Update the data in CUM to advance over an argument of mode MODE and
6236 data type TYPE. (TYPE is null for libcalls where that information
6237 may not be available.) */
6240 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6241 const_tree type, bool named)
6243 HOST_WIDE_INT bytes, words;
6245 if (mode == BLKmode)
6246 bytes = int_size_in_bytes (type);
6248 bytes = GET_MODE_SIZE (mode);
6249 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6252 mode = type_natural_mode (type, NULL);
6254 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6255 function_arg_advance_ms_64 (cum, bytes, words);
6256 else if (TARGET_64BIT)
6257 function_arg_advance_64 (cum, mode, type, words, named);
6259 function_arg_advance_32 (cum, mode, type, bytes, words);
6262 /* Define where to put the arguments to a function.
6263 Value is zero to push the argument on the stack,
6264 or a hard register in which to store the argument.
6266 MODE is the argument's machine mode.
6267 TYPE is the data type of the argument (as a tree).
6268 This is null for libcalls where that information may
6270 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6271 the preceding args and about the function being called.
6272 NAMED is nonzero if this argument is a named parameter
6273 (otherwise it is an extra parameter matching an ellipsis). */
6276 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6277 enum machine_mode orig_mode, const_tree type,
6278 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6280 static bool warnedsse, warnedmmx;
6282 /* Avoid the AL settings for the Unix64 ABI. */
6283 if (mode == VOIDmode)
6299 if (words <= cum->nregs)
6301 int regno = cum->regno;
6303 /* Fastcall allocates the first two DWORD (SImode) or
6304 smaller arguments to ECX and EDX if it isn't an
6310 || (type && AGGREGATE_TYPE_P (type)))
6313 /* ECX not EAX is the first allocated register. */
6314 if (regno == AX_REG)
6317 return gen_rtx_REG (mode, regno);
6322 if (cum->float_in_sse < 2)
6325 if (cum->float_in_sse < 1)
6329 /* In 32bit, we pass TImode in xmm registers. */
6336 if (!type || !AGGREGATE_TYPE_P (type))
6338 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6341 warning (0, "SSE vector argument without SSE enabled "
6345 return gen_reg_or_parallel (mode, orig_mode,
6346 cum->sse_regno + FIRST_SSE_REG);
6351 /* OImode shouldn't be used directly. */
6360 if (!type || !AGGREGATE_TYPE_P (type))
6363 return gen_reg_or_parallel (mode, orig_mode,
6364 cum->sse_regno + FIRST_SSE_REG);
6374 if (!type || !AGGREGATE_TYPE_P (type))
6376 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6379 warning (0, "MMX vector argument without MMX enabled "
6383 return gen_reg_or_parallel (mode, orig_mode,
6384 cum->mmx_regno + FIRST_MMX_REG);
6393 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6394 enum machine_mode orig_mode, const_tree type, bool named)
6396 /* Handle a hidden AL argument containing number of registers
6397 for varargs x86-64 functions. */
6398 if (mode == VOIDmode)
6399 return GEN_INT (cum->maybe_vaarg
6400 ? (cum->sse_nregs < 0
6401 ? X86_64_SSE_REGPARM_MAX
6416 /* Unnamed 256bit vector mode parameters are passed on stack. */
6422 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6424 &x86_64_int_parameter_registers [cum->regno],
6429 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6430 enum machine_mode orig_mode, bool named,
6431 HOST_WIDE_INT bytes)
6435 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6436 We use value of -2 to specify that current function call is MSABI. */
6437 if (mode == VOIDmode)
6438 return GEN_INT (-2);
6440 /* If we've run out of registers, it goes on the stack. */
6441 if (cum->nregs == 0)
6444 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6446 /* Only floating point modes are passed in anything but integer regs. */
6447 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6450 regno = cum->regno + FIRST_SSE_REG;
6455 /* Unnamed floating parameters are passed in both the
6456 SSE and integer registers. */
6457 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6458 t2 = gen_rtx_REG (mode, regno);
6459 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6460 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6461 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6464 /* Handle aggregated types passed in register. */
6465 if (orig_mode == BLKmode)
6467 if (bytes > 0 && bytes <= 8)
6468 mode = (bytes > 4 ? DImode : SImode);
6469 if (mode == BLKmode)
6473 return gen_reg_or_parallel (mode, orig_mode, regno);
6476 /* Return where to put the arguments to a function.
6477 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6479 MODE is the argument's machine mode. TYPE is the data type of the
6480 argument. It is null for libcalls where that information may not be
6481 available. CUM gives information about the preceding args and about
6482 the function being called. NAMED is nonzero if this argument is a
6483 named parameter (otherwise it is an extra parameter matching an
6487 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6488 const_tree type, bool named)
6490 enum machine_mode mode = omode;
6491 HOST_WIDE_INT bytes, words;
6493 if (mode == BLKmode)
6494 bytes = int_size_in_bytes (type);
6496 bytes = GET_MODE_SIZE (mode);
6497 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6499 /* To simplify the code below, represent vector types with a vector mode
6500 even if MMX/SSE are not active. */
6501 if (type && TREE_CODE (type) == VECTOR_TYPE)
6502 mode = type_natural_mode (type, cum);
6504 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6505 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6506 else if (TARGET_64BIT)
6507 return function_arg_64 (cum, mode, omode, type, named);
6509 return function_arg_32 (cum, mode, omode, type, bytes, words);
6512 /* A C expression that indicates when an argument must be passed by
6513 reference. If nonzero for an argument, a copy of that argument is
6514 made in memory and a pointer to the argument is passed instead of
6515 the argument itself. The pointer is passed in whatever way is
6516 appropriate for passing a pointer to that type. */
6519 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6520 enum machine_mode mode ATTRIBUTE_UNUSED,
6521 const_tree type, bool named ATTRIBUTE_UNUSED)
6523 /* See Windows x64 Software Convention. */
6524 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6526 int msize = (int) GET_MODE_SIZE (mode);
6529 /* Arrays are passed by reference. */
6530 if (TREE_CODE (type) == ARRAY_TYPE)
6533 if (AGGREGATE_TYPE_P (type))
6535 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6536 are passed by reference. */
6537 msize = int_size_in_bytes (type);
6541 /* __m128 is passed by reference. */
6543 case 1: case 2: case 4: case 8:
6549 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6555 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6558 contains_aligned_value_p (const_tree type)
6560 enum machine_mode mode = TYPE_MODE (type);
6561 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6565 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6567 if (TYPE_ALIGN (type) < 128)
6570 if (AGGREGATE_TYPE_P (type))
6572 /* Walk the aggregates recursively. */
6573 switch (TREE_CODE (type))
6577 case QUAL_UNION_TYPE:
6581 /* Walk all the structure fields. */
6582 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6584 if (TREE_CODE (field) == FIELD_DECL
6585 && contains_aligned_value_p (TREE_TYPE (field)))
6592 /* Just for use if some languages passes arrays by value. */
6593 if (contains_aligned_value_p (TREE_TYPE (type)))
6604 /* Gives the alignment boundary, in bits, of an argument with the
6605 specified mode and type. */
6608 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6613 /* Since the main variant type is used for call, we convert it to
6614 the main variant type. */
6615 type = TYPE_MAIN_VARIANT (type);
6616 align = TYPE_ALIGN (type);
6619 align = GET_MODE_ALIGNMENT (mode);
6620 if (align < PARM_BOUNDARY)
6621 align = PARM_BOUNDARY;
6622 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6623 natural boundaries. */
6624 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6626 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6627 make an exception for SSE modes since these require 128bit
6630 The handling here differs from field_alignment. ICC aligns MMX
6631 arguments to 4 byte boundaries, while structure fields are aligned
6632 to 8 byte boundaries. */
6635 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6636 align = PARM_BOUNDARY;
6640 if (!contains_aligned_value_p (type))
6641 align = PARM_BOUNDARY;
6644 if (align > BIGGEST_ALIGNMENT)
6645 align = BIGGEST_ALIGNMENT;
6649 /* Return true if N is a possible register number of function value. */
6652 ix86_function_value_regno_p (const unsigned int regno)
6659 case FIRST_FLOAT_REG:
6660 /* TODO: The function should depend on current function ABI but
6661 builtins.c would need updating then. Therefore we use the
6663 if (TARGET_64BIT && ix86_abi == MS_ABI)
6665 return TARGET_FLOAT_RETURNS_IN_80387;
6671 if (TARGET_MACHO || TARGET_64BIT)
6679 /* Define how to find the value returned by a function.
6680 VALTYPE is the data type of the value (as a tree).
6681 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6682 otherwise, FUNC is 0. */
6685 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6686 const_tree fntype, const_tree fn)
6690 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6691 we normally prevent this case when mmx is not available. However
6692 some ABIs may require the result to be returned like DImode. */
6693 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6694 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6696 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6697 we prevent this case when sse is not available. However some ABIs
6698 may require the result to be returned like integer TImode. */
6699 else if (mode == TImode
6700 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6701 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6703 /* 32-byte vector modes in %ymm0. */
6704 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6705 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6707 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6708 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6709 regno = FIRST_FLOAT_REG;
6711 /* Most things go in %eax. */
6714 /* Override FP return register with %xmm0 for local functions when
6715 SSE math is enabled or for functions with sseregparm attribute. */
6716 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6718 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6719 if ((sse_level >= 1 && mode == SFmode)
6720 || (sse_level == 2 && mode == DFmode))
6721 regno = FIRST_SSE_REG;
6724 /* OImode shouldn't be used directly. */
6725 gcc_assert (mode != OImode);
6727 return gen_rtx_REG (orig_mode, regno);
6731 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6736 /* Handle libcalls, which don't provide a type node. */
6737 if (valtype == NULL)
6749 return gen_rtx_REG (mode, FIRST_SSE_REG);
6752 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6756 return gen_rtx_REG (mode, AX_REG);
6760 ret = construct_container (mode, orig_mode, valtype, 1,
6761 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6762 x86_64_int_return_registers, 0);
6764 /* For zero sized structures, construct_container returns NULL, but we
6765 need to keep rest of compiler happy by returning meaningful value. */
6767 ret = gen_rtx_REG (orig_mode, AX_REG);
6773 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6775 unsigned int regno = AX_REG;
6779 switch (GET_MODE_SIZE (mode))
6782 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6783 && !COMPLEX_MODE_P (mode))
6784 regno = FIRST_SSE_REG;
6788 if (mode == SFmode || mode == DFmode)
6789 regno = FIRST_SSE_REG;
6795 return gen_rtx_REG (orig_mode, regno);
6799 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6800 enum machine_mode orig_mode, enum machine_mode mode)
6802 const_tree fn, fntype;
6805 if (fntype_or_decl && DECL_P (fntype_or_decl))
6806 fn = fntype_or_decl;
6807 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6809 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6810 return function_value_ms_64 (orig_mode, mode);
6811 else if (TARGET_64BIT)
6812 return function_value_64 (orig_mode, mode, valtype);
6814 return function_value_32 (orig_mode, mode, fntype, fn);
6818 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6819 bool outgoing ATTRIBUTE_UNUSED)
6821 enum machine_mode mode, orig_mode;
6823 orig_mode = TYPE_MODE (valtype);
6824 mode = type_natural_mode (valtype, NULL);
6825 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6829 ix86_libcall_value (enum machine_mode mode)
6831 return ix86_function_value_1 (NULL, NULL, mode, mode);
6834 /* Return true iff type is returned in memory. */
6836 static bool ATTRIBUTE_UNUSED
6837 return_in_memory_32 (const_tree type, enum machine_mode mode)
6841 if (mode == BLKmode)
6844 size = int_size_in_bytes (type);
6846 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6849 if (VECTOR_MODE_P (mode) || mode == TImode)
6851 /* User-created vectors small enough to fit in EAX. */
6855 /* MMX/3dNow values are returned in MM0,
6856 except when it doesn't exits or the ABI prescribes otherwise. */
6858 return !TARGET_MMX || TARGET_VECT8_RETURNS;
6860 /* SSE values are returned in XMM0, except when it doesn't exist. */
6864 /* AVX values are returned in YMM0, except when it doesn't exist. */
6875 /* OImode shouldn't be used directly. */
6876 gcc_assert (mode != OImode);
6881 static bool ATTRIBUTE_UNUSED
6882 return_in_memory_64 (const_tree type, enum machine_mode mode)
6884 int needed_intregs, needed_sseregs;
6885 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6888 static bool ATTRIBUTE_UNUSED
6889 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6891 HOST_WIDE_INT size = int_size_in_bytes (type);
6893 /* __m128 is returned in xmm0. */
6894 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6895 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6898 /* Otherwise, the size must be exactly in [1248]. */
6899 return size != 1 && size != 2 && size != 4 && size != 8;
6903 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6905 #ifdef SUBTARGET_RETURN_IN_MEMORY
6906 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6908 const enum machine_mode mode = type_natural_mode (type, NULL);
6912 if (ix86_function_type_abi (fntype) == MS_ABI)
6913 return return_in_memory_ms_64 (type, mode);
6915 return return_in_memory_64 (type, mode);
6918 return return_in_memory_32 (type, mode);
6922 /* When returning SSE vector types, we have a choice of either
6923 (1) being abi incompatible with a -march switch, or
6924 (2) generating an error.
6925 Given no good solution, I think the safest thing is one warning.
6926 The user won't be able to use -Werror, but....
6928 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6929 called in response to actually generating a caller or callee that
6930 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6931 via aggregate_value_p for general type probing from tree-ssa. */
6934 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6936 static bool warnedsse, warnedmmx;
6938 if (!TARGET_64BIT && type)
6940 /* Look at the return type of the function, not the function type. */
6941 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6943 if (!TARGET_SSE && !warnedsse)
6946 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6949 warning (0, "SSE vector return without SSE enabled "
6954 if (!TARGET_MMX && !warnedmmx)
6956 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6959 warning (0, "MMX vector return without MMX enabled "
6969 /* Create the va_list data type. */
6971 /* Returns the calling convention specific va_list date type.
6972 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6975 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6977 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6979 /* For i386 we use plain pointer to argument area. */
6980 if (!TARGET_64BIT || abi == MS_ABI)
6981 return build_pointer_type (char_type_node);
6983 record = lang_hooks.types.make_type (RECORD_TYPE);
6984 type_decl = build_decl (BUILTINS_LOCATION,
6985 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6987 f_gpr = build_decl (BUILTINS_LOCATION,
6988 FIELD_DECL, get_identifier ("gp_offset"),
6989 unsigned_type_node);
6990 f_fpr = build_decl (BUILTINS_LOCATION,
6991 FIELD_DECL, get_identifier ("fp_offset"),
6992 unsigned_type_node);
6993 f_ovf = build_decl (BUILTINS_LOCATION,
6994 FIELD_DECL, get_identifier ("overflow_arg_area"),
6996 f_sav = build_decl (BUILTINS_LOCATION,
6997 FIELD_DECL, get_identifier ("reg_save_area"),
7000 va_list_gpr_counter_field = f_gpr;
7001 va_list_fpr_counter_field = f_fpr;
7003 DECL_FIELD_CONTEXT (f_gpr) = record;
7004 DECL_FIELD_CONTEXT (f_fpr) = record;
7005 DECL_FIELD_CONTEXT (f_ovf) = record;
7006 DECL_FIELD_CONTEXT (f_sav) = record;
7008 TREE_CHAIN (record) = type_decl;
7009 TYPE_NAME (record) = type_decl;
7010 TYPE_FIELDS (record) = f_gpr;
7011 DECL_CHAIN (f_gpr) = f_fpr;
7012 DECL_CHAIN (f_fpr) = f_ovf;
7013 DECL_CHAIN (f_ovf) = f_sav;
7015 layout_type (record);
7017 /* The correct type is an array type of one element. */
7018 return build_array_type (record, build_index_type (size_zero_node));
7021 /* Setup the builtin va_list data type and for 64-bit the additional
7022 calling convention specific va_list data types. */
7025 ix86_build_builtin_va_list (void)
7027 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7029 /* Initialize abi specific va_list builtin types. */
7033 if (ix86_abi == MS_ABI)
7035 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7036 if (TREE_CODE (t) != RECORD_TYPE)
7037 t = build_variant_type_copy (t);
7038 sysv_va_list_type_node = t;
7043 if (TREE_CODE (t) != RECORD_TYPE)
7044 t = build_variant_type_copy (t);
7045 sysv_va_list_type_node = t;
7047 if (ix86_abi != MS_ABI)
7049 t = ix86_build_builtin_va_list_abi (MS_ABI);
7050 if (TREE_CODE (t) != RECORD_TYPE)
7051 t = build_variant_type_copy (t);
7052 ms_va_list_type_node = t;
7057 if (TREE_CODE (t) != RECORD_TYPE)
7058 t = build_variant_type_copy (t);
7059 ms_va_list_type_node = t;
7066 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7069 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7075 /* GPR size of varargs save area. */
7076 if (cfun->va_list_gpr_size)
7077 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7079 ix86_varargs_gpr_size = 0;
7081 /* FPR size of varargs save area. We don't need it if we don't pass
7082 anything in SSE registers. */
7083 if (TARGET_SSE && cfun->va_list_fpr_size)
7084 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7086 ix86_varargs_fpr_size = 0;
7088 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7091 save_area = frame_pointer_rtx;
7092 set = get_varargs_alias_set ();
7094 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7095 if (max > X86_64_REGPARM_MAX)
7096 max = X86_64_REGPARM_MAX;
7098 for (i = cum->regno; i < max; i++)
7100 mem = gen_rtx_MEM (Pmode,
7101 plus_constant (save_area, i * UNITS_PER_WORD));
7102 MEM_NOTRAP_P (mem) = 1;
7103 set_mem_alias_set (mem, set);
7104 emit_move_insn (mem, gen_rtx_REG (Pmode,
7105 x86_64_int_parameter_registers[i]));
7108 if (ix86_varargs_fpr_size)
7110 enum machine_mode smode;
7113 /* Now emit code to save SSE registers. The AX parameter contains number
7114 of SSE parameter registers used to call this function, though all we
7115 actually check here is the zero/non-zero status. */
7117 label = gen_label_rtx ();
7118 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7119 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7122 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7123 we used movdqa (i.e. TImode) instead? Perhaps even better would
7124 be if we could determine the real mode of the data, via a hook
7125 into pass_stdarg. Ignore all that for now. */
7127 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7128 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7130 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7131 if (max > X86_64_SSE_REGPARM_MAX)
7132 max = X86_64_SSE_REGPARM_MAX;
7134 for (i = cum->sse_regno; i < max; ++i)
7136 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7137 mem = gen_rtx_MEM (smode, mem);
7138 MEM_NOTRAP_P (mem) = 1;
7139 set_mem_alias_set (mem, set);
7140 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7142 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7150 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7152 alias_set_type set = get_varargs_alias_set ();
7155 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7159 mem = gen_rtx_MEM (Pmode,
7160 plus_constant (virtual_incoming_args_rtx,
7161 i * UNITS_PER_WORD));
7162 MEM_NOTRAP_P (mem) = 1;
7163 set_mem_alias_set (mem, set);
7165 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7166 emit_move_insn (mem, reg);
7171 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7172 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7175 CUMULATIVE_ARGS next_cum;
7178 /* This argument doesn't appear to be used anymore. Which is good,
7179 because the old code here didn't suppress rtl generation. */
7180 gcc_assert (!no_rtl);
7185 fntype = TREE_TYPE (current_function_decl);
7187 /* For varargs, we do not want to skip the dummy va_dcl argument.
7188 For stdargs, we do want to skip the last named argument. */
7190 if (stdarg_p (fntype))
7191 ix86_function_arg_advance (&next_cum, mode, type, true);
7193 if (cum->call_abi == MS_ABI)
7194 setup_incoming_varargs_ms_64 (&next_cum);
7196 setup_incoming_varargs_64 (&next_cum);
7199 /* Checks if TYPE is of kind va_list char *. */
7202 is_va_list_char_pointer (tree type)
7206 /* For 32-bit it is always true. */
7209 canonic = ix86_canonical_va_list_type (type);
7210 return (canonic == ms_va_list_type_node
7211 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7214 /* Implement va_start. */
7217 ix86_va_start (tree valist, rtx nextarg)
7219 HOST_WIDE_INT words, n_gpr, n_fpr;
7220 tree f_gpr, f_fpr, f_ovf, f_sav;
7221 tree gpr, fpr, ovf, sav, t;
7226 if (flag_split_stack
7227 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7229 unsigned int scratch_regno;
7231 /* When we are splitting the stack, we can't refer to the stack
7232 arguments using internal_arg_pointer, because they may be on
7233 the old stack. The split stack prologue will arrange to
7234 leave a pointer to the old stack arguments in a scratch
7235 register, which we here copy to a pseudo-register. The split
7236 stack prologue can't set the pseudo-register directly because
7237 it (the prologue) runs before any registers have been saved. */
7239 scratch_regno = split_stack_prologue_scratch_regno ();
7240 if (scratch_regno != INVALID_REGNUM)
7244 reg = gen_reg_rtx (Pmode);
7245 cfun->machine->split_stack_varargs_pointer = reg;
7248 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7252 push_topmost_sequence ();
7253 emit_insn_after (seq, entry_of_function ());
7254 pop_topmost_sequence ();
7258 /* Only 64bit target needs something special. */
7259 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7261 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7262 std_expand_builtin_va_start (valist, nextarg);
7267 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7268 next = expand_binop (ptr_mode, add_optab,
7269 cfun->machine->split_stack_varargs_pointer,
7270 crtl->args.arg_offset_rtx,
7271 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7272 convert_move (va_r, next, 0);
7277 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7278 f_fpr = DECL_CHAIN (f_gpr);
7279 f_ovf = DECL_CHAIN (f_fpr);
7280 f_sav = DECL_CHAIN (f_ovf);
7282 valist = build_simple_mem_ref (valist);
7283 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7284 /* The following should be folded into the MEM_REF offset. */
7285 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7287 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7289 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7291 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7294 /* Count number of gp and fp argument registers used. */
7295 words = crtl->args.info.words;
7296 n_gpr = crtl->args.info.regno;
7297 n_fpr = crtl->args.info.sse_regno;
7299 if (cfun->va_list_gpr_size)
7301 type = TREE_TYPE (gpr);
7302 t = build2 (MODIFY_EXPR, type,
7303 gpr, build_int_cst (type, n_gpr * 8));
7304 TREE_SIDE_EFFECTS (t) = 1;
7305 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7308 if (TARGET_SSE && cfun->va_list_fpr_size)
7310 type = TREE_TYPE (fpr);
7311 t = build2 (MODIFY_EXPR, type, fpr,
7312 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7313 TREE_SIDE_EFFECTS (t) = 1;
7314 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7317 /* Find the overflow area. */
7318 type = TREE_TYPE (ovf);
7319 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7320 ovf_rtx = crtl->args.internal_arg_pointer;
7322 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7323 t = make_tree (type, ovf_rtx);
7325 t = build2 (POINTER_PLUS_EXPR, type, t,
7326 size_int (words * UNITS_PER_WORD));
7327 t = build2 (MODIFY_EXPR, type, ovf, t);
7328 TREE_SIDE_EFFECTS (t) = 1;
7329 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7331 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7333 /* Find the register save area.
7334 Prologue of the function save it right above stack frame. */
7335 type = TREE_TYPE (sav);
7336 t = make_tree (type, frame_pointer_rtx);
7337 if (!ix86_varargs_gpr_size)
7338 t = build2 (POINTER_PLUS_EXPR, type, t,
7339 size_int (-8 * X86_64_REGPARM_MAX));
7340 t = build2 (MODIFY_EXPR, type, sav, t);
7341 TREE_SIDE_EFFECTS (t) = 1;
7342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7346 /* Implement va_arg. */
7349 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7352 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7353 tree f_gpr, f_fpr, f_ovf, f_sav;
7354 tree gpr, fpr, ovf, sav, t;
7356 tree lab_false, lab_over = NULL_TREE;
7361 enum machine_mode nat_mode;
7362 unsigned int arg_boundary;
7364 /* Only 64bit target needs something special. */
7365 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7366 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7368 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7369 f_fpr = DECL_CHAIN (f_gpr);
7370 f_ovf = DECL_CHAIN (f_fpr);
7371 f_sav = DECL_CHAIN (f_ovf);
7373 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7374 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7375 valist = build_va_arg_indirect_ref (valist);
7376 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7377 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7378 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7380 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7382 type = build_pointer_type (type);
7383 size = int_size_in_bytes (type);
7384 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7386 nat_mode = type_natural_mode (type, NULL);
7395 /* Unnamed 256bit vector mode parameters are passed on stack. */
7396 if (ix86_cfun_abi () == SYSV_ABI)
7403 container = construct_container (nat_mode, TYPE_MODE (type),
7404 type, 0, X86_64_REGPARM_MAX,
7405 X86_64_SSE_REGPARM_MAX, intreg,
7410 /* Pull the value out of the saved registers. */
7412 addr = create_tmp_var (ptr_type_node, "addr");
7416 int needed_intregs, needed_sseregs;
7418 tree int_addr, sse_addr;
7420 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7421 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7423 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7425 need_temp = (!REG_P (container)
7426 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7427 || TYPE_ALIGN (type) > 128));
7429 /* In case we are passing structure, verify that it is consecutive block
7430 on the register save area. If not we need to do moves. */
7431 if (!need_temp && !REG_P (container))
7433 /* Verify that all registers are strictly consecutive */
7434 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7438 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7440 rtx slot = XVECEXP (container, 0, i);
7441 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7442 || INTVAL (XEXP (slot, 1)) != i * 16)
7450 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7452 rtx slot = XVECEXP (container, 0, i);
7453 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7454 || INTVAL (XEXP (slot, 1)) != i * 8)
7466 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7467 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7470 /* First ensure that we fit completely in registers. */
7473 t = build_int_cst (TREE_TYPE (gpr),
7474 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7475 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7476 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7477 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7478 gimplify_and_add (t, pre_p);
7482 t = build_int_cst (TREE_TYPE (fpr),
7483 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7484 + X86_64_REGPARM_MAX * 8);
7485 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7486 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7487 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7488 gimplify_and_add (t, pre_p);
7491 /* Compute index to start of area used for integer regs. */
7494 /* int_addr = gpr + sav; */
7495 t = fold_convert (sizetype, gpr);
7496 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7497 gimplify_assign (int_addr, t, pre_p);
7501 /* sse_addr = fpr + sav; */
7502 t = fold_convert (sizetype, fpr);
7503 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7504 gimplify_assign (sse_addr, t, pre_p);
7508 int i, prev_size = 0;
7509 tree temp = create_tmp_var (type, "va_arg_tmp");
7512 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7513 gimplify_assign (addr, t, pre_p);
7515 for (i = 0; i < XVECLEN (container, 0); i++)
7517 rtx slot = XVECEXP (container, 0, i);
7518 rtx reg = XEXP (slot, 0);
7519 enum machine_mode mode = GET_MODE (reg);
7525 tree dest_addr, dest;
7526 int cur_size = GET_MODE_SIZE (mode);
7528 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7529 prev_size = INTVAL (XEXP (slot, 1));
7530 if (prev_size + cur_size > size)
7532 cur_size = size - prev_size;
7533 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7534 if (mode == BLKmode)
7537 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7538 if (mode == GET_MODE (reg))
7539 addr_type = build_pointer_type (piece_type);
7541 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7543 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7546 if (SSE_REGNO_P (REGNO (reg)))
7548 src_addr = sse_addr;
7549 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7553 src_addr = int_addr;
7554 src_offset = REGNO (reg) * 8;
7556 src_addr = fold_convert (addr_type, src_addr);
7557 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7558 size_int (src_offset));
7560 dest_addr = fold_convert (daddr_type, addr);
7561 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7562 size_int (prev_size));
7563 if (cur_size == GET_MODE_SIZE (mode))
7565 src = build_va_arg_indirect_ref (src_addr);
7566 dest = build_va_arg_indirect_ref (dest_addr);
7568 gimplify_assign (dest, src, pre_p);
7573 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7574 3, dest_addr, src_addr,
7575 size_int (cur_size));
7576 gimplify_and_add (copy, pre_p);
7578 prev_size += cur_size;
7584 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7585 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7586 gimplify_assign (gpr, t, pre_p);
7591 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7592 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7593 gimplify_assign (fpr, t, pre_p);
7596 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7598 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7601 /* ... otherwise out of the overflow area. */
7603 /* When we align parameter on stack for caller, if the parameter
7604 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7605 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7606 here with caller. */
7607 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7608 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7609 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7611 /* Care for on-stack alignment if needed. */
7612 if (arg_boundary <= 64 || size == 0)
7616 HOST_WIDE_INT align = arg_boundary / 8;
7617 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7618 size_int (align - 1));
7619 t = fold_convert (sizetype, t);
7620 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7622 t = fold_convert (TREE_TYPE (ovf), t);
7625 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7626 gimplify_assign (addr, t, pre_p);
7628 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7629 size_int (rsize * UNITS_PER_WORD));
7630 gimplify_assign (unshare_expr (ovf), t, pre_p);
7633 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7635 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7636 addr = fold_convert (ptrtype, addr);
7639 addr = build_va_arg_indirect_ref (addr);
7640 return build_va_arg_indirect_ref (addr);
7643 /* Return true if OPNUM's MEM should be matched
7644 in movabs* patterns. */
7647 ix86_check_movabs (rtx insn, int opnum)
7651 set = PATTERN (insn);
7652 if (GET_CODE (set) == PARALLEL)
7653 set = XVECEXP (set, 0, 0);
7654 gcc_assert (GET_CODE (set) == SET);
7655 mem = XEXP (set, opnum);
7656 while (GET_CODE (mem) == SUBREG)
7657 mem = SUBREG_REG (mem);
7658 gcc_assert (MEM_P (mem));
7659 return volatile_ok || !MEM_VOLATILE_P (mem);
7662 /* Initialize the table of extra 80387 mathematical constants. */
7665 init_ext_80387_constants (void)
7667 static const char * cst[5] =
7669 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7670 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7671 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7672 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7673 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7677 for (i = 0; i < 5; i++)
7679 real_from_string (&ext_80387_constants_table[i], cst[i]);
7680 /* Ensure each constant is rounded to XFmode precision. */
7681 real_convert (&ext_80387_constants_table[i],
7682 XFmode, &ext_80387_constants_table[i]);
7685 ext_80387_constants_init = 1;
7688 /* Return non-zero if the constant is something that
7689 can be loaded with a special instruction. */
7692 standard_80387_constant_p (rtx x)
7694 enum machine_mode mode = GET_MODE (x);
7698 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7701 if (x == CONST0_RTX (mode))
7703 if (x == CONST1_RTX (mode))
7706 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7708 /* For XFmode constants, try to find a special 80387 instruction when
7709 optimizing for size or on those CPUs that benefit from them. */
7711 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7715 if (! ext_80387_constants_init)
7716 init_ext_80387_constants ();
7718 for (i = 0; i < 5; i++)
7719 if (real_identical (&r, &ext_80387_constants_table[i]))
7723 /* Load of the constant -0.0 or -1.0 will be split as
7724 fldz;fchs or fld1;fchs sequence. */
7725 if (real_isnegzero (&r))
7727 if (real_identical (&r, &dconstm1))
7733 /* Return the opcode of the special instruction to be used to load
7737 standard_80387_constant_opcode (rtx x)
7739 switch (standard_80387_constant_p (x))
7763 /* Return the CONST_DOUBLE representing the 80387 constant that is
7764 loaded by the specified special instruction. The argument IDX
7765 matches the return value from standard_80387_constant_p. */
7768 standard_80387_constant_rtx (int idx)
7772 if (! ext_80387_constants_init)
7773 init_ext_80387_constants ();
7789 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7793 /* Return 1 if X is all 0s and 2 if x is all 1s
7794 in supported SSE vector mode. */
7797 standard_sse_constant_p (rtx x)
7799 enum machine_mode mode = GET_MODE (x);
7801 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7803 if (vector_all_ones_operand (x, mode))
7819 /* Return the opcode of the special instruction to be used to load
7823 standard_sse_constant_opcode (rtx insn, rtx x)
7825 switch (standard_sse_constant_p (x))
7828 switch (get_attr_mode (insn))
7831 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7833 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7834 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7836 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7838 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7839 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7841 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7843 return "vxorps\t%x0, %x0, %x0";
7845 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7846 return "vxorps\t%x0, %x0, %x0";
7848 return "vxorpd\t%x0, %x0, %x0";
7850 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7851 return "vxorps\t%x0, %x0, %x0";
7853 return "vpxor\t%x0, %x0, %x0";
7858 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7865 /* Returns true if OP contains a symbol reference */
7868 symbolic_reference_mentioned_p (rtx op)
7873 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7876 fmt = GET_RTX_FORMAT (GET_CODE (op));
7877 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7883 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7884 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7888 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7895 /* Return true if it is appropriate to emit `ret' instructions in the
7896 body of a function. Do this only if the epilogue is simple, needing a
7897 couple of insns. Prior to reloading, we can't tell how many registers
7898 must be saved, so return false then. Return false if there is no frame
7899 marker to de-allocate. */
7902 ix86_can_use_return_insn_p (void)
7904 struct ix86_frame frame;
7906 if (! reload_completed || frame_pointer_needed)
7909 /* Don't allow more than 32k pop, since that's all we can do
7910 with one instruction. */
7911 if (crtl->args.pops_args && crtl->args.size >= 32768)
7914 ix86_compute_frame_layout (&frame);
7915 return (frame.stack_pointer_offset == UNITS_PER_WORD
7916 && (frame.nregs + frame.nsseregs) == 0);
7919 /* Value should be nonzero if functions must have frame pointers.
7920 Zero means the frame pointer need not be set up (and parms may
7921 be accessed via the stack pointer) in functions that seem suitable. */
7924 ix86_frame_pointer_required (void)
7926 /* If we accessed previous frames, then the generated code expects
7927 to be able to access the saved ebp value in our frame. */
7928 if (cfun->machine->accesses_prev_frame)
7931 /* Several x86 os'es need a frame pointer for other reasons,
7932 usually pertaining to setjmp. */
7933 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7936 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7937 turns off the frame pointer by default. Turn it back on now if
7938 we've not got a leaf function. */
7939 if (TARGET_OMIT_LEAF_FRAME_POINTER
7940 && (!current_function_is_leaf
7941 || ix86_current_function_calls_tls_descriptor))
7944 if (crtl->profile && !flag_fentry)
7950 /* Record that the current function accesses previous call frames. */
7953 ix86_setup_frame_addresses (void)
7955 cfun->machine->accesses_prev_frame = 1;
7958 #ifndef USE_HIDDEN_LINKONCE
7959 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7960 # define USE_HIDDEN_LINKONCE 1
7962 # define USE_HIDDEN_LINKONCE 0
7966 static int pic_labels_used;
7968 /* Fills in the label name that should be used for a pc thunk for
7969 the given register. */
7972 get_pc_thunk_name (char name[32], unsigned int regno)
7974 gcc_assert (!TARGET_64BIT);
7976 if (USE_HIDDEN_LINKONCE)
7977 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7979 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7983 /* This function generates code for -fpic that loads %ebx with
7984 the return address of the caller and then returns. */
7987 ix86_code_end (void)
7992 for (regno = AX_REG; regno <= SP_REG; regno++)
7997 if (!(pic_labels_used & (1 << regno)))
8000 get_pc_thunk_name (name, regno);
8002 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8003 get_identifier (name),
8004 build_function_type (void_type_node, void_list_node));
8005 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8006 NULL_TREE, void_type_node);
8007 TREE_PUBLIC (decl) = 1;
8008 TREE_STATIC (decl) = 1;
8013 switch_to_section (darwin_sections[text_coal_section]);
8014 fputs ("\t.weak_definition\t", asm_out_file);
8015 assemble_name (asm_out_file, name);
8016 fputs ("\n\t.private_extern\t", asm_out_file);
8017 assemble_name (asm_out_file, name);
8018 putc ('\n', asm_out_file);
8019 ASM_OUTPUT_LABEL (asm_out_file, name);
8020 DECL_WEAK (decl) = 1;
8024 if (USE_HIDDEN_LINKONCE)
8026 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8028 targetm.asm_out.unique_section (decl, 0);
8029 switch_to_section (get_named_section (decl, NULL, 0));
8031 targetm.asm_out.globalize_label (asm_out_file, name);
8032 fputs ("\t.hidden\t", asm_out_file);
8033 assemble_name (asm_out_file, name);
8034 putc ('\n', asm_out_file);
8035 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8039 switch_to_section (text_section);
8040 ASM_OUTPUT_LABEL (asm_out_file, name);
8043 DECL_INITIAL (decl) = make_node (BLOCK);
8044 current_function_decl = decl;
8045 init_function_start (decl);
8046 first_function_block_is_cold = false;
8047 /* Make sure unwind info is emitted for the thunk if needed. */
8048 final_start_function (emit_barrier (), asm_out_file, 1);
8050 /* Pad stack IP move with 4 instructions (two NOPs count
8051 as one instruction). */
8052 if (TARGET_PAD_SHORT_FUNCTION)
8057 fputs ("\tnop\n", asm_out_file);
8060 xops[0] = gen_rtx_REG (Pmode, regno);
8061 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8062 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8063 fputs ("\tret\n", asm_out_file);
8064 final_end_function ();
8065 init_insn_lengths ();
8066 free_after_compilation (cfun);
8068 current_function_decl = NULL;
8071 if (flag_split_stack)
8072 file_end_indicate_split_stack ();
8075 /* Emit code for the SET_GOT patterns. */
8078 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8084 if (TARGET_VXWORKS_RTP && flag_pic)
8086 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8087 xops[2] = gen_rtx_MEM (Pmode,
8088 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8089 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8091 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8092 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8093 an unadorned address. */
8094 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8095 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8096 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8100 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8102 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8104 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8107 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8110 output_asm_insn ("call\t%a2", xops);
8111 #ifdef DWARF2_UNWIND_INFO
8112 /* The call to next label acts as a push. */
8113 if (dwarf2out_do_frame ())
8117 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8118 gen_rtx_PLUS (Pmode,
8121 RTX_FRAME_RELATED_P (insn) = 1;
8122 dwarf2out_frame_debug (insn, true);
8129 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8130 is what will be referenced by the Mach-O PIC subsystem. */
8132 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8135 targetm.asm_out.internal_label (asm_out_file, "L",
8136 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8140 output_asm_insn ("pop%z0\t%0", xops);
8141 #ifdef DWARF2_UNWIND_INFO
8142 /* The pop is a pop and clobbers dest, but doesn't restore it
8143 for unwind info purposes. */
8144 if (dwarf2out_do_frame ())
8148 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8149 dwarf2out_frame_debug (insn, true);
8150 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8151 gen_rtx_PLUS (Pmode,
8154 RTX_FRAME_RELATED_P (insn) = 1;
8155 dwarf2out_frame_debug (insn, true);
8164 get_pc_thunk_name (name, REGNO (dest));
8165 pic_labels_used |= 1 << REGNO (dest);
8167 #ifdef DWARF2_UNWIND_INFO
8168 /* Ensure all queued register saves are flushed before the
8170 if (dwarf2out_do_frame ())
8171 dwarf2out_flush_queued_reg_saves ();
8173 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8174 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8175 output_asm_insn ("call\t%X2", xops);
8176 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8177 is what will be referenced by the Mach-O PIC subsystem. */
8180 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8182 targetm.asm_out.internal_label (asm_out_file, "L",
8183 CODE_LABEL_NUMBER (label));
8190 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8191 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8193 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8198 /* Generate an "push" pattern for input ARG. */
8203 struct machine_function *m = cfun->machine;
8205 if (m->fs.cfa_reg == stack_pointer_rtx)
8206 m->fs.cfa_offset += UNITS_PER_WORD;
8207 m->fs.sp_offset += UNITS_PER_WORD;
8209 return gen_rtx_SET (VOIDmode,
8211 gen_rtx_PRE_DEC (Pmode,
8212 stack_pointer_rtx)),
8216 /* Generate an "pop" pattern for input ARG. */
8221 return gen_rtx_SET (VOIDmode,
8224 gen_rtx_POST_INC (Pmode,
8225 stack_pointer_rtx)));
8228 /* Return >= 0 if there is an unused call-clobbered register available
8229 for the entire function. */
8232 ix86_select_alt_pic_regnum (void)
8234 if (current_function_is_leaf
8236 && !ix86_current_function_calls_tls_descriptor)
8239 /* Can't use the same register for both PIC and DRAP. */
8241 drap = REGNO (crtl->drap_reg);
8244 for (i = 2; i >= 0; --i)
8245 if (i != drap && !df_regs_ever_live_p (i))
8249 return INVALID_REGNUM;
8252 /* Return 1 if we need to save REGNO. */
8254 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8256 if (pic_offset_table_rtx
8257 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8258 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8260 || crtl->calls_eh_return
8261 || crtl->uses_const_pool))
8263 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8268 if (crtl->calls_eh_return && maybe_eh_return)
8273 unsigned test = EH_RETURN_DATA_REGNO (i);
8274 if (test == INVALID_REGNUM)
8281 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8284 return (df_regs_ever_live_p (regno)
8285 && !call_used_regs[regno]
8286 && !fixed_regs[regno]
8287 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8290 /* Return number of saved general prupose registers. */
8293 ix86_nsaved_regs (void)
8298 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8299 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8304 /* Return number of saved SSE registrers. */
8307 ix86_nsaved_sseregs (void)
8312 if (ix86_cfun_abi () != MS_ABI)
8314 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8315 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8320 /* Given FROM and TO register numbers, say whether this elimination is
8321 allowed. If stack alignment is needed, we can only replace argument
8322 pointer with hard frame pointer, or replace frame pointer with stack
8323 pointer. Otherwise, frame pointer elimination is automatically
8324 handled and all other eliminations are valid. */
8327 ix86_can_eliminate (const int from, const int to)
8329 if (stack_realign_fp)
8330 return ((from == ARG_POINTER_REGNUM
8331 && to == HARD_FRAME_POINTER_REGNUM)
8332 || (from == FRAME_POINTER_REGNUM
8333 && to == STACK_POINTER_REGNUM));
8335 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8338 /* Return the offset between two registers, one to be eliminated, and the other
8339 its replacement, at the start of a routine. */
8342 ix86_initial_elimination_offset (int from, int to)
8344 struct ix86_frame frame;
8345 ix86_compute_frame_layout (&frame);
8347 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8348 return frame.hard_frame_pointer_offset;
8349 else if (from == FRAME_POINTER_REGNUM
8350 && to == HARD_FRAME_POINTER_REGNUM)
8351 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8354 gcc_assert (to == STACK_POINTER_REGNUM);
8356 if (from == ARG_POINTER_REGNUM)
8357 return frame.stack_pointer_offset;
8359 gcc_assert (from == FRAME_POINTER_REGNUM);
8360 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8364 /* In a dynamically-aligned function, we can't know the offset from
8365 stack pointer to frame pointer, so we must ensure that setjmp
8366 eliminates fp against the hard fp (%ebp) rather than trying to
8367 index from %esp up to the top of the frame across a gap that is
8368 of unknown (at compile-time) size. */
8370 ix86_builtin_setjmp_frame_value (void)
8372 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8375 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8376 field in the TCB, so they can not be used together. */
8379 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8383 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8385 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8388 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8391 error ("%<-fsplit-stack%> requires "
8392 "assembler support for CFI directives");
8400 /* When using -fsplit-stack, the allocation routines set a field in
8401 the TCB to the bottom of the stack plus this much space, measured
8404 #define SPLIT_STACK_AVAILABLE 256
8406 /* Fill structure ix86_frame about frame of currently computed function. */
8409 ix86_compute_frame_layout (struct ix86_frame *frame)
8411 unsigned int stack_alignment_needed;
8412 HOST_WIDE_INT offset;
8413 unsigned int preferred_alignment;
8414 HOST_WIDE_INT size = get_frame_size ();
8415 HOST_WIDE_INT to_allocate;
8417 frame->nregs = ix86_nsaved_regs ();
8418 frame->nsseregs = ix86_nsaved_sseregs ();
8420 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8421 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8423 /* MS ABI seem to require stack alignment to be always 16 except for function
8424 prologues and leaf. */
8425 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8426 && (!current_function_is_leaf || cfun->calls_alloca != 0
8427 || ix86_current_function_calls_tls_descriptor))
8429 preferred_alignment = 16;
8430 stack_alignment_needed = 16;
8431 crtl->preferred_stack_boundary = 128;
8432 crtl->stack_alignment_needed = 128;
8435 gcc_assert (!size || stack_alignment_needed);
8436 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8437 gcc_assert (preferred_alignment <= stack_alignment_needed);
8439 /* During reload iteration the amount of registers saved can change.
8440 Recompute the value as needed. Do not recompute when amount of registers
8441 didn't change as reload does multiple calls to the function and does not
8442 expect the decision to change within single iteration. */
8443 if (!optimize_function_for_size_p (cfun)
8444 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8446 int count = frame->nregs;
8447 struct cgraph_node *node = cgraph_node (current_function_decl);
8449 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8450 /* The fast prologue uses move instead of push to save registers. This
8451 is significantly longer, but also executes faster as modern hardware
8452 can execute the moves in parallel, but can't do that for push/pop.
8454 Be careful about choosing what prologue to emit: When function takes
8455 many instructions to execute we may use slow version as well as in
8456 case function is known to be outside hot spot (this is known with
8457 feedback only). Weight the size of function by number of registers
8458 to save as it is cheap to use one or two push instructions but very
8459 slow to use many of them. */
8461 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8462 if (node->frequency < NODE_FREQUENCY_NORMAL
8463 || (flag_branch_probabilities
8464 && node->frequency < NODE_FREQUENCY_HOT))
8465 cfun->machine->use_fast_prologue_epilogue = false;
8467 cfun->machine->use_fast_prologue_epilogue
8468 = !expensive_function_p (count);
8470 if (TARGET_PROLOGUE_USING_MOVE
8471 && cfun->machine->use_fast_prologue_epilogue)
8472 frame->save_regs_using_mov = true;
8474 frame->save_regs_using_mov = false;
8476 /* If static stack checking is enabled and done with probes, the registers
8477 need to be saved before allocating the frame. */
8478 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8479 frame->save_regs_using_mov = false;
8481 /* Skip return address. */
8482 offset = UNITS_PER_WORD;
8484 /* Skip pushed static chain. */
8485 if (ix86_static_chain_on_stack)
8486 offset += UNITS_PER_WORD;
8488 /* Skip saved base pointer. */
8489 if (frame_pointer_needed)
8490 offset += UNITS_PER_WORD;
8492 frame->hard_frame_pointer_offset = offset;
8494 /* Register save area */
8495 offset += frame->nregs * UNITS_PER_WORD;
8496 frame->reg_save_offset = offset;
8498 /* Align and set SSE register save area. */
8499 if (frame->nsseregs)
8501 /* The only ABI that has saved SSE registers (Win64) also has a
8502 16-byte aligned default stack, and thus we don't need to be
8503 within the re-aligned local stack frame to save them. */
8504 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8505 offset = (offset + 16 - 1) & -16;
8506 offset += frame->nsseregs * 16;
8508 frame->sse_reg_save_offset = offset;
8510 /* The re-aligned stack starts here. Values before this point are not
8511 directly comparable with values below this point. In order to make
8512 sure that no value happens to be the same before and after, force
8513 the alignment computation below to add a non-zero value. */
8514 if (stack_realign_fp)
8515 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8518 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8519 offset += frame->va_arg_size;
8521 /* Align start of frame for local function. */
8522 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8524 /* Frame pointer points here. */
8525 frame->frame_pointer_offset = offset;
8529 /* Add outgoing arguments area. Can be skipped if we eliminated
8530 all the function calls as dead code.
8531 Skipping is however impossible when function calls alloca. Alloca
8532 expander assumes that last crtl->outgoing_args_size
8533 of stack frame are unused. */
8534 if (ACCUMULATE_OUTGOING_ARGS
8535 && (!current_function_is_leaf || cfun->calls_alloca
8536 || ix86_current_function_calls_tls_descriptor))
8538 offset += crtl->outgoing_args_size;
8539 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8542 frame->outgoing_arguments_size = 0;
8544 /* Align stack boundary. Only needed if we're calling another function
8546 if (!current_function_is_leaf || cfun->calls_alloca
8547 || ix86_current_function_calls_tls_descriptor)
8548 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8550 /* We've reached end of stack frame. */
8551 frame->stack_pointer_offset = offset;
8553 /* Size prologue needs to allocate. */
8554 to_allocate = offset - frame->sse_reg_save_offset;
8556 if ((!to_allocate && frame->nregs <= 1)
8557 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8558 frame->save_regs_using_mov = false;
8560 if (ix86_using_red_zone ()
8561 && current_function_sp_is_unchanging
8562 && current_function_is_leaf
8563 && !ix86_current_function_calls_tls_descriptor)
8565 frame->red_zone_size = to_allocate;
8566 if (frame->save_regs_using_mov)
8567 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8568 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8569 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8572 frame->red_zone_size = 0;
8573 frame->stack_pointer_offset -= frame->red_zone_size;
8576 /* This is semi-inlined memory_address_length, but simplified
8577 since we know that we're always dealing with reg+offset, and
8578 to avoid having to create and discard all that rtl. */
8581 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8587 /* EBP and R13 cannot be encoded without an offset. */
8588 len = (regno == BP_REG || regno == R13_REG);
8590 else if (IN_RANGE (offset, -128, 127))
8593 /* ESP and R12 must be encoded with a SIB byte. */
8594 if (regno == SP_REG || regno == R12_REG)
8600 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8601 The valid base registers are taken from CFUN->MACHINE->FS. */
8604 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8606 const struct machine_function *m = cfun->machine;
8607 rtx base_reg = NULL;
8608 HOST_WIDE_INT base_offset = 0;
8610 if (m->use_fast_prologue_epilogue)
8612 /* Choose the base register most likely to allow the most scheduling
8613 opportunities. Generally FP is valid througout the function,
8614 while DRAP must be reloaded within the epilogue. But choose either
8615 over the SP due to increased encoding size. */
8619 base_reg = hard_frame_pointer_rtx;
8620 base_offset = m->fs.fp_offset - cfa_offset;
8622 else if (m->fs.drap_valid)
8624 base_reg = crtl->drap_reg;
8625 base_offset = 0 - cfa_offset;
8627 else if (m->fs.sp_valid)
8629 base_reg = stack_pointer_rtx;
8630 base_offset = m->fs.sp_offset - cfa_offset;
8635 HOST_WIDE_INT toffset;
8638 /* Choose the base register with the smallest address encoding.
8639 With a tie, choose FP > DRAP > SP. */
8642 base_reg = stack_pointer_rtx;
8643 base_offset = m->fs.sp_offset - cfa_offset;
8644 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8646 if (m->fs.drap_valid)
8648 toffset = 0 - cfa_offset;
8649 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8652 base_reg = crtl->drap_reg;
8653 base_offset = toffset;
8659 toffset = m->fs.fp_offset - cfa_offset;
8660 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8663 base_reg = hard_frame_pointer_rtx;
8664 base_offset = toffset;
8669 gcc_assert (base_reg != NULL);
8671 return plus_constant (base_reg, base_offset);
8674 /* Emit code to save registers in the prologue. */
8677 ix86_emit_save_regs (void)
8682 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8683 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8685 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8686 RTX_FRAME_RELATED_P (insn) = 1;
8690 /* Emit a single register save at CFA - CFA_OFFSET. */
8693 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8694 HOST_WIDE_INT cfa_offset)
8696 struct machine_function *m = cfun->machine;
8697 rtx reg = gen_rtx_REG (mode, regno);
8698 rtx mem, addr, base, insn;
8700 addr = choose_baseaddr (cfa_offset);
8701 mem = gen_frame_mem (mode, addr);
8703 /* For SSE saves, we need to indicate the 128-bit alignment. */
8704 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8706 insn = emit_move_insn (mem, reg);
8707 RTX_FRAME_RELATED_P (insn) = 1;
8710 if (GET_CODE (base) == PLUS)
8711 base = XEXP (base, 0);
8712 gcc_checking_assert (REG_P (base));
8714 /* When saving registers into a re-aligned local stack frame, avoid
8715 any tricky guessing by dwarf2out. */
8716 if (m->fs.realigned)
8718 gcc_checking_assert (stack_realign_drap);
8720 if (regno == REGNO (crtl->drap_reg))
8722 /* A bit of a hack. We force the DRAP register to be saved in
8723 the re-aligned stack frame, which provides us with a copy
8724 of the CFA that will last past the prologue. Install it. */
8725 gcc_checking_assert (cfun->machine->fs.fp_valid);
8726 addr = plus_constant (hard_frame_pointer_rtx,
8727 cfun->machine->fs.fp_offset - cfa_offset);
8728 mem = gen_rtx_MEM (mode, addr);
8729 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8733 /* The frame pointer is a stable reference within the
8734 aligned frame. Use it. */
8735 gcc_checking_assert (cfun->machine->fs.fp_valid);
8736 addr = plus_constant (hard_frame_pointer_rtx,
8737 cfun->machine->fs.fp_offset - cfa_offset);
8738 mem = gen_rtx_MEM (mode, addr);
8739 add_reg_note (insn, REG_CFA_EXPRESSION,
8740 gen_rtx_SET (VOIDmode, mem, reg));
8744 /* The memory may not be relative to the current CFA register,
8745 which means that we may need to generate a new pattern for
8746 use by the unwind info. */
8747 else if (base != m->fs.cfa_reg)
8749 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8750 mem = gen_rtx_MEM (mode, addr);
8751 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8755 /* Emit code to save registers using MOV insns.
8756 First register is stored at CFA - CFA_OFFSET. */
8758 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8762 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8763 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8765 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8766 cfa_offset -= UNITS_PER_WORD;
8770 /* Emit code to save SSE registers using MOV insns.
8771 First register is stored at CFA - CFA_OFFSET. */
8773 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8777 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8778 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8780 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8785 static GTY(()) rtx queued_cfa_restores;
8787 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8788 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8789 Don't add the note if the previously saved value will be left untouched
8790 within stack red-zone till return, as unwinders can find the same value
8791 in the register and on the stack. */
8794 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8796 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8801 add_reg_note (insn, REG_CFA_RESTORE, reg);
8802 RTX_FRAME_RELATED_P (insn) = 1;
8806 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8809 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8812 ix86_add_queued_cfa_restore_notes (rtx insn)
8815 if (!queued_cfa_restores)
8817 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8819 XEXP (last, 1) = REG_NOTES (insn);
8820 REG_NOTES (insn) = queued_cfa_restores;
8821 queued_cfa_restores = NULL_RTX;
8822 RTX_FRAME_RELATED_P (insn) = 1;
8825 /* Expand prologue or epilogue stack adjustment.
8826 The pattern exist to put a dependency on all ebp-based memory accesses.
8827 STYLE should be negative if instructions should be marked as frame related,
8828 zero if %r11 register is live and cannot be freely used and positive
8832 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8833 int style, bool set_cfa)
8835 struct machine_function *m = cfun->machine;
8839 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
8840 else if (x86_64_immediate_operand (offset, DImode))
8841 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
8845 /* r11 is used by indirect sibcall return as well, set before the
8846 epilogue and used after the epilogue. */
8848 tmp = gen_rtx_REG (DImode, R11_REG);
8851 gcc_assert (src != hard_frame_pointer_rtx
8852 && dest != hard_frame_pointer_rtx);
8853 tmp = hard_frame_pointer_rtx;
8855 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8857 RTX_FRAME_RELATED_P (insn) = 1;
8859 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
8862 insn = emit_insn (insn);
8864 ix86_add_queued_cfa_restore_notes (insn);
8870 gcc_assert (m->fs.cfa_reg == src);
8871 m->fs.cfa_offset += INTVAL (offset);
8872 m->fs.cfa_reg = dest;
8874 r = gen_rtx_PLUS (Pmode, src, offset);
8875 r = gen_rtx_SET (VOIDmode, dest, r);
8876 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8877 RTX_FRAME_RELATED_P (insn) = 1;
8880 RTX_FRAME_RELATED_P (insn) = 1;
8882 if (dest == stack_pointer_rtx)
8884 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8885 bool valid = m->fs.sp_valid;
8887 if (src == hard_frame_pointer_rtx)
8889 valid = m->fs.fp_valid;
8890 ooffset = m->fs.fp_offset;
8892 else if (src == crtl->drap_reg)
8894 valid = m->fs.drap_valid;
8899 /* Else there are two possibilities: SP itself, which we set
8900 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8901 taken care of this by hand along the eh_return path. */
8902 gcc_checking_assert (src == stack_pointer_rtx
8903 || offset == const0_rtx);
8906 m->fs.sp_offset = ooffset - INTVAL (offset);
8907 m->fs.sp_valid = valid;
8911 /* Find an available register to be used as dynamic realign argument
8912 pointer regsiter. Such a register will be written in prologue and
8913 used in begin of body, so it must not be
8914 1. parameter passing register.
8916 We reuse static-chain register if it is available. Otherwise, we
8917 use DI for i386 and R13 for x86-64. We chose R13 since it has
8920 Return: the regno of chosen register. */
8923 find_drap_reg (void)
8925 tree decl = cfun->decl;
8929 /* Use R13 for nested function or function need static chain.
8930 Since function with tail call may use any caller-saved
8931 registers in epilogue, DRAP must not use caller-saved
8932 register in such case. */
8933 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8940 /* Use DI for nested function or function need static chain.
8941 Since function with tail call may use any caller-saved
8942 registers in epilogue, DRAP must not use caller-saved
8943 register in such case. */
8944 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8947 /* Reuse static chain register if it isn't used for parameter
8949 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8950 && !lookup_attribute ("fastcall",
8951 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8952 && !lookup_attribute ("thiscall",
8953 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8960 /* Return minimum incoming stack alignment. */
8963 ix86_minimum_incoming_stack_boundary (bool sibcall)
8965 unsigned int incoming_stack_boundary;
8967 /* Prefer the one specified at command line. */
8968 if (ix86_user_incoming_stack_boundary)
8969 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8970 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8971 if -mstackrealign is used, it isn't used for sibcall check and
8972 estimated stack alignment is 128bit. */
8975 && ix86_force_align_arg_pointer
8976 && crtl->stack_alignment_estimated == 128)
8977 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8979 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8981 /* Incoming stack alignment can be changed on individual functions
8982 via force_align_arg_pointer attribute. We use the smallest
8983 incoming stack boundary. */
8984 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8985 && lookup_attribute (ix86_force_align_arg_pointer_string,
8986 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8987 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8989 /* The incoming stack frame has to be aligned at least at
8990 parm_stack_boundary. */
8991 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8992 incoming_stack_boundary = crtl->parm_stack_boundary;
8994 /* Stack at entrance of main is aligned by runtime. We use the
8995 smallest incoming stack boundary. */
8996 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8997 && DECL_NAME (current_function_decl)
8998 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8999 && DECL_FILE_SCOPE_P (current_function_decl))
9000 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9002 return incoming_stack_boundary;
9005 /* Update incoming stack boundary and estimated stack alignment. */
9008 ix86_update_stack_boundary (void)
9010 ix86_incoming_stack_boundary
9011 = ix86_minimum_incoming_stack_boundary (false);
9013 /* x86_64 vararg needs 16byte stack alignment for register save
9017 && crtl->stack_alignment_estimated < 128)
9018 crtl->stack_alignment_estimated = 128;
9021 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9022 needed or an rtx for DRAP otherwise. */
9025 ix86_get_drap_rtx (void)
9027 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9028 crtl->need_drap = true;
9030 if (stack_realign_drap)
9032 /* Assign DRAP to vDRAP and returns vDRAP */
9033 unsigned int regno = find_drap_reg ();
9038 arg_ptr = gen_rtx_REG (Pmode, regno);
9039 crtl->drap_reg = arg_ptr;
9042 drap_vreg = copy_to_reg (arg_ptr);
9046 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9049 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9050 RTX_FRAME_RELATED_P (insn) = 1;
9058 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9061 ix86_internal_arg_pointer (void)
9063 return virtual_incoming_args_rtx;
9066 struct scratch_reg {
9071 /* Return a short-lived scratch register for use on function entry.
9072 In 32-bit mode, it is valid only after the registers are saved
9073 in the prologue. This register must be released by means of
9074 release_scratch_register_on_entry once it is dead. */
9077 get_scratch_register_on_entry (struct scratch_reg *sr)
9085 /* We always use R11 in 64-bit mode. */
9090 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9092 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9093 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9094 int regparm = ix86_function_regparm (fntype, decl);
9096 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9098 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9099 for the static chain register. */
9100 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9101 && drap_regno != AX_REG)
9103 else if (regparm < 2 && drap_regno != DX_REG)
9105 /* ecx is the static chain register. */
9106 else if (regparm < 3 && !fastcall_p && !static_chain_p
9107 && drap_regno != CX_REG)
9109 else if (ix86_save_reg (BX_REG, true))
9111 /* esi is the static chain register. */
9112 else if (!(regparm == 3 && static_chain_p)
9113 && ix86_save_reg (SI_REG, true))
9115 else if (ix86_save_reg (DI_REG, true))
9119 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9124 sr->reg = gen_rtx_REG (Pmode, regno);
9127 rtx insn = emit_insn (gen_push (sr->reg));
9128 RTX_FRAME_RELATED_P (insn) = 1;
9132 /* Release a scratch register obtained from the preceding function. */
9135 release_scratch_register_on_entry (struct scratch_reg *sr)
9139 rtx x, insn = emit_insn (gen_pop (sr->reg));
9141 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9142 RTX_FRAME_RELATED_P (insn) = 1;
9143 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9144 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9145 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9149 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9151 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9154 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9156 /* We skip the probe for the first interval + a small dope of 4 words and
9157 probe that many bytes past the specified size to maintain a protection
9158 area at the botton of the stack. */
9159 const int dope = 4 * UNITS_PER_WORD;
9160 rtx size_rtx = GEN_INT (size);
9162 /* See if we have a constant small number of probes to generate. If so,
9163 that's the easy case. The run-time loop is made up of 11 insns in the
9164 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9165 for n # of intervals. */
9166 if (size <= 5 * PROBE_INTERVAL)
9168 HOST_WIDE_INT i, adjust;
9169 bool first_probe = true;
9171 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9172 values of N from 1 until it exceeds SIZE. If only one probe is
9173 needed, this will not generate any code. Then adjust and probe
9174 to PROBE_INTERVAL + SIZE. */
9175 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9179 adjust = 2 * PROBE_INTERVAL + dope;
9180 first_probe = false;
9183 adjust = PROBE_INTERVAL;
9185 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9186 plus_constant (stack_pointer_rtx, -adjust)));
9187 emit_stack_probe (stack_pointer_rtx);
9191 adjust = size + PROBE_INTERVAL + dope;
9193 adjust = size + PROBE_INTERVAL - i;
9195 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9196 plus_constant (stack_pointer_rtx, -adjust)));
9197 emit_stack_probe (stack_pointer_rtx);
9199 /* Adjust back to account for the additional first interval. */
9200 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9201 plus_constant (stack_pointer_rtx,
9202 PROBE_INTERVAL + dope)));
9205 /* Otherwise, do the same as above, but in a loop. Note that we must be
9206 extra careful with variables wrapping around because we might be at
9207 the very top (or the very bottom) of the address space and we have
9208 to be able to handle this case properly; in particular, we use an
9209 equality test for the loop condition. */
9212 HOST_WIDE_INT rounded_size;
9213 struct scratch_reg sr;
9215 get_scratch_register_on_entry (&sr);
9218 /* Step 1: round SIZE to the previous multiple of the interval. */
9220 rounded_size = size & -PROBE_INTERVAL;
9223 /* Step 2: compute initial and final value of the loop counter. */
9225 /* SP = SP_0 + PROBE_INTERVAL. */
9226 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9227 plus_constant (stack_pointer_rtx,
9228 - (PROBE_INTERVAL + dope))));
9230 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9231 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9232 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9233 gen_rtx_PLUS (Pmode, sr.reg,
9234 stack_pointer_rtx)));
9239 while (SP != LAST_ADDR)
9241 SP = SP + PROBE_INTERVAL
9245 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9246 values of N from 1 until it is equal to ROUNDED_SIZE. */
9248 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9251 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9252 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9254 if (size != rounded_size)
9256 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9257 plus_constant (stack_pointer_rtx,
9258 rounded_size - size)));
9259 emit_stack_probe (stack_pointer_rtx);
9262 /* Adjust back to account for the additional first interval. */
9263 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9264 plus_constant (stack_pointer_rtx,
9265 PROBE_INTERVAL + dope)));
9267 release_scratch_register_on_entry (&sr);
9270 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9271 cfun->machine->fs.sp_offset += size;
9273 /* Make sure nothing is scheduled before we are done. */
9274 emit_insn (gen_blockage ());
9277 /* Adjust the stack pointer up to REG while probing it. */
9280 output_adjust_stack_and_probe (rtx reg)
9282 static int labelno = 0;
9283 char loop_lab[32], end_lab[32];
9286 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9287 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9289 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9291 /* Jump to END_LAB if SP == LAST_ADDR. */
9292 xops[0] = stack_pointer_rtx;
9294 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9295 fputs ("\tje\t", asm_out_file);
9296 assemble_name_raw (asm_out_file, end_lab);
9297 fputc ('\n', asm_out_file);
9299 /* SP = SP + PROBE_INTERVAL. */
9300 xops[1] = GEN_INT (PROBE_INTERVAL);
9301 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9304 xops[1] = const0_rtx;
9305 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9307 fprintf (asm_out_file, "\tjmp\t");
9308 assemble_name_raw (asm_out_file, loop_lab);
9309 fputc ('\n', asm_out_file);
9311 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9316 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9317 inclusive. These are offsets from the current stack pointer. */
9320 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9322 /* See if we have a constant small number of probes to generate. If so,
9323 that's the easy case. The run-time loop is made up of 7 insns in the
9324 generic case while the compile-time loop is made up of n insns for n #
9326 if (size <= 7 * PROBE_INTERVAL)
9330 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9331 it exceeds SIZE. If only one probe is needed, this will not
9332 generate any code. Then probe at FIRST + SIZE. */
9333 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9334 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9336 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9339 /* Otherwise, do the same as above, but in a loop. Note that we must be
9340 extra careful with variables wrapping around because we might be at
9341 the very top (or the very bottom) of the address space and we have
9342 to be able to handle this case properly; in particular, we use an
9343 equality test for the loop condition. */
9346 HOST_WIDE_INT rounded_size, last;
9347 struct scratch_reg sr;
9349 get_scratch_register_on_entry (&sr);
9352 /* Step 1: round SIZE to the previous multiple of the interval. */
9354 rounded_size = size & -PROBE_INTERVAL;
9357 /* Step 2: compute initial and final value of the loop counter. */
9359 /* TEST_OFFSET = FIRST. */
9360 emit_move_insn (sr.reg, GEN_INT (-first));
9362 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9363 last = first + rounded_size;
9368 while (TEST_ADDR != LAST_ADDR)
9370 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9374 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9375 until it is equal to ROUNDED_SIZE. */
9377 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9380 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9381 that SIZE is equal to ROUNDED_SIZE. */
9383 if (size != rounded_size)
9384 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9387 rounded_size - size));
9389 release_scratch_register_on_entry (&sr);
9392 /* Make sure nothing is scheduled before we are done. */
9393 emit_insn (gen_blockage ());
9396 /* Probe a range of stack addresses from REG to END, inclusive. These are
9397 offsets from the current stack pointer. */
9400 output_probe_stack_range (rtx reg, rtx end)
9402 static int labelno = 0;
9403 char loop_lab[32], end_lab[32];
9406 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9407 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9409 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9411 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9414 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9415 fputs ("\tje\t", asm_out_file);
9416 assemble_name_raw (asm_out_file, end_lab);
9417 fputc ('\n', asm_out_file);
9419 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9420 xops[1] = GEN_INT (PROBE_INTERVAL);
9421 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9423 /* Probe at TEST_ADDR. */
9424 xops[0] = stack_pointer_rtx;
9426 xops[2] = const0_rtx;
9427 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9429 fprintf (asm_out_file, "\tjmp\t");
9430 assemble_name_raw (asm_out_file, loop_lab);
9431 fputc ('\n', asm_out_file);
9433 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9438 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9439 to be generated in correct form. */
9441 ix86_finalize_stack_realign_flags (void)
9443 /* Check if stack realign is really needed after reload, and
9444 stores result in cfun */
9445 unsigned int incoming_stack_boundary
9446 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9447 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9448 unsigned int stack_realign = (incoming_stack_boundary
9449 < (current_function_is_leaf
9450 ? crtl->max_used_stack_slot_alignment
9451 : crtl->stack_alignment_needed));
9453 if (crtl->stack_realign_finalized)
9455 /* After stack_realign_needed is finalized, we can't no longer
9457 gcc_assert (crtl->stack_realign_needed == stack_realign);
9461 crtl->stack_realign_needed = stack_realign;
9462 crtl->stack_realign_finalized = true;
9466 /* Expand the prologue into a bunch of separate insns. */
9469 ix86_expand_prologue (void)
9471 struct machine_function *m = cfun->machine;
9474 struct ix86_frame frame;
9475 HOST_WIDE_INT allocate;
9476 bool int_registers_saved;
9478 ix86_finalize_stack_realign_flags ();
9480 /* DRAP should not coexist with stack_realign_fp */
9481 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9483 memset (&m->fs, 0, sizeof (m->fs));
9485 /* Initialize CFA state for before the prologue. */
9486 m->fs.cfa_reg = stack_pointer_rtx;
9487 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9489 /* Track SP offset to the CFA. We continue tracking this after we've
9490 swapped the CFA register away from SP. In the case of re-alignment
9491 this is fudged; we're interested to offsets within the local frame. */
9492 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9493 m->fs.sp_valid = true;
9495 ix86_compute_frame_layout (&frame);
9497 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9499 /* We should have already generated an error for any use of
9500 ms_hook on a nested function. */
9501 gcc_checking_assert (!ix86_static_chain_on_stack);
9503 /* Check if profiling is active and we shall use profiling before
9504 prologue variant. If so sorry. */
9505 if (crtl->profile && flag_fentry != 0)
9506 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9508 /* In ix86_asm_output_function_label we emitted:
9509 8b ff movl.s %edi,%edi
9511 8b ec movl.s %esp,%ebp
9513 This matches the hookable function prologue in Win32 API
9514 functions in Microsoft Windows XP Service Pack 2 and newer.
9515 Wine uses this to enable Windows apps to hook the Win32 API
9516 functions provided by Wine.
9518 What that means is that we've already set up the frame pointer. */
9520 if (frame_pointer_needed
9521 && !(crtl->drap_reg && crtl->stack_realign_needed))
9525 /* We've decided to use the frame pointer already set up.
9526 Describe this to the unwinder by pretending that both
9527 push and mov insns happen right here.
9529 Putting the unwind info here at the end of the ms_hook
9530 is done so that we can make absolutely certain we get
9531 the required byte sequence at the start of the function,
9532 rather than relying on an assembler that can produce
9533 the exact encoding required.
9535 However it does mean (in the unpatched case) that we have
9536 a 1 insn window where the asynchronous unwind info is
9537 incorrect. However, if we placed the unwind info at
9538 its correct location we would have incorrect unwind info
9539 in the patched case. Which is probably all moot since
9540 I don't expect Wine generates dwarf2 unwind info for the
9541 system libraries that use this feature. */
9543 insn = emit_insn (gen_blockage ());
9545 push = gen_push (hard_frame_pointer_rtx);
9546 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9548 RTX_FRAME_RELATED_P (push) = 1;
9549 RTX_FRAME_RELATED_P (mov) = 1;
9551 RTX_FRAME_RELATED_P (insn) = 1;
9552 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9553 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9555 /* Note that gen_push incremented m->fs.cfa_offset, even
9556 though we didn't emit the push insn here. */
9557 m->fs.cfa_reg = hard_frame_pointer_rtx;
9558 m->fs.fp_offset = m->fs.cfa_offset;
9559 m->fs.fp_valid = true;
9563 /* The frame pointer is not needed so pop %ebp again.
9564 This leaves us with a pristine state. */
9565 emit_insn (gen_pop (hard_frame_pointer_rtx));
9569 /* The first insn of a function that accepts its static chain on the
9570 stack is to push the register that would be filled in by a direct
9571 call. This insn will be skipped by the trampoline. */
9572 else if (ix86_static_chain_on_stack)
9574 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9575 emit_insn (gen_blockage ());
9577 /* We don't want to interpret this push insn as a register save,
9578 only as a stack adjustment. The real copy of the register as
9579 a save will be done later, if needed. */
9580 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9581 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9582 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9583 RTX_FRAME_RELATED_P (insn) = 1;
9586 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9587 of DRAP is needed and stack realignment is really needed after reload */
9588 if (stack_realign_drap)
9590 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9592 /* Only need to push parameter pointer reg if it is caller saved. */
9593 if (!call_used_regs[REGNO (crtl->drap_reg)])
9595 /* Push arg pointer reg */
9596 insn = emit_insn (gen_push (crtl->drap_reg));
9597 RTX_FRAME_RELATED_P (insn) = 1;
9600 /* Grab the argument pointer. */
9601 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9602 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9603 RTX_FRAME_RELATED_P (insn) = 1;
9604 m->fs.cfa_reg = crtl->drap_reg;
9605 m->fs.cfa_offset = 0;
9607 /* Align the stack. */
9608 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9610 GEN_INT (-align_bytes)));
9611 RTX_FRAME_RELATED_P (insn) = 1;
9613 /* Replicate the return address on the stack so that return
9614 address can be reached via (argp - 1) slot. This is needed
9615 to implement macro RETURN_ADDR_RTX and intrinsic function
9616 expand_builtin_return_addr etc. */
9617 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9618 t = gen_frame_mem (Pmode, t);
9619 insn = emit_insn (gen_push (t));
9620 RTX_FRAME_RELATED_P (insn) = 1;
9622 /* For the purposes of frame and register save area addressing,
9623 we've started over with a new frame. */
9624 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9625 m->fs.realigned = true;
9628 if (frame_pointer_needed && !m->fs.fp_valid)
9630 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9631 slower on all targets. Also sdb doesn't like it. */
9632 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9633 RTX_FRAME_RELATED_P (insn) = 1;
9635 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9636 RTX_FRAME_RELATED_P (insn) = 1;
9638 if (m->fs.cfa_reg == stack_pointer_rtx)
9639 m->fs.cfa_reg = hard_frame_pointer_rtx;
9640 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9641 m->fs.fp_offset = m->fs.sp_offset;
9642 m->fs.fp_valid = true;
9645 int_registers_saved = (frame.nregs == 0);
9647 if (!int_registers_saved)
9649 /* If saving registers via PUSH, do so now. */
9650 if (!frame.save_regs_using_mov)
9652 ix86_emit_save_regs ();
9653 int_registers_saved = true;
9654 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9657 /* When using red zone we may start register saving before allocating
9658 the stack frame saving one cycle of the prologue. However, avoid
9659 doing this if we have to probe the stack; at least on x86_64 the
9660 stack probe can turn into a call that clobbers a red zone location. */
9661 else if (ix86_using_red_zone ()
9662 && (! TARGET_STACK_PROBE
9663 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9665 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9666 int_registers_saved = true;
9670 if (stack_realign_fp)
9672 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9673 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9675 /* The computation of the size of the re-aligned stack frame means
9676 that we must allocate the size of the register save area before
9677 performing the actual alignment. Otherwise we cannot guarantee
9678 that there's enough storage above the realignment point. */
9679 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9680 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9681 GEN_INT (m->fs.sp_offset
9682 - frame.sse_reg_save_offset),
9685 /* Align the stack. */
9686 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9688 GEN_INT (-align_bytes)));
9690 /* For the purposes of register save area addressing, the stack
9691 pointer is no longer valid. As for the value of sp_offset,
9692 see ix86_compute_frame_layout, which we need to match in order
9693 to pass verification of stack_pointer_offset at the end. */
9694 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9695 m->fs.sp_valid = false;
9698 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9700 if (flag_stack_usage)
9702 /* We start to count from ARG_POINTER. */
9703 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9705 /* If it was realigned, take into account the fake frame. */
9706 if (stack_realign_drap)
9708 if (ix86_static_chain_on_stack)
9709 stack_size += UNITS_PER_WORD;
9711 if (!call_used_regs[REGNO (crtl->drap_reg)])
9712 stack_size += UNITS_PER_WORD;
9714 /* This over-estimates by 1 minimal-stack-alignment-unit but
9715 mitigates that by counting in the new return address slot. */
9716 current_function_dynamic_stack_size
9717 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9720 current_function_static_stack_size = stack_size;
9723 /* The stack has already been decremented by the instruction calling us
9724 so we need to probe unconditionally to preserve the protection area. */
9725 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9727 /* We expect the registers to be saved when probes are used. */
9728 gcc_assert (int_registers_saved);
9730 if (STACK_CHECK_MOVING_SP)
9732 ix86_adjust_stack_and_probe (allocate);
9737 HOST_WIDE_INT size = allocate;
9739 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9740 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9742 if (TARGET_STACK_PROBE)
9743 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9745 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9751 else if (!ix86_target_stack_probe ()
9752 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9754 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9755 GEN_INT (-allocate), -1,
9756 m->fs.cfa_reg == stack_pointer_rtx);
9760 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9762 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
9764 bool eax_live = false;
9765 bool r10_live = false;
9768 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9769 if (!TARGET_64BIT_MS_ABI)
9770 eax_live = ix86_eax_live_at_start_p ();
9774 emit_insn (gen_push (eax));
9775 allocate -= UNITS_PER_WORD;
9779 r10 = gen_rtx_REG (Pmode, R10_REG);
9780 emit_insn (gen_push (r10));
9781 allocate -= UNITS_PER_WORD;
9784 emit_move_insn (eax, GEN_INT (allocate));
9785 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9787 /* Use the fact that AX still contains ALLOCATE. */
9788 adjust_stack_insn = (TARGET_64BIT
9789 ? gen_pro_epilogue_adjust_stack_di_sub
9790 : gen_pro_epilogue_adjust_stack_si_sub);
9792 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
9793 stack_pointer_rtx, eax));
9795 if (m->fs.cfa_reg == stack_pointer_rtx)
9797 m->fs.cfa_offset += allocate;
9799 RTX_FRAME_RELATED_P (insn) = 1;
9800 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9801 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9802 plus_constant (stack_pointer_rtx,
9805 m->fs.sp_offset += allocate;
9807 if (r10_live && eax_live)
9809 t = choose_baseaddr (m->fs.sp_offset - allocate);
9810 emit_move_insn (r10, gen_frame_mem (Pmode, t));
9811 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
9812 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9814 else if (eax_live || r10_live)
9816 t = choose_baseaddr (m->fs.sp_offset - allocate);
9817 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
9820 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9822 if (!int_registers_saved)
9823 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9825 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9827 pic_reg_used = false;
9828 if (pic_offset_table_rtx
9829 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9832 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9834 if (alt_pic_reg_used != INVALID_REGNUM)
9835 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9837 pic_reg_used = true;
9844 if (ix86_cmodel == CM_LARGE_PIC)
9846 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9847 rtx label = gen_label_rtx ();
9849 LABEL_PRESERVE_P (label) = 1;
9850 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9851 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9852 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9853 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9854 pic_offset_table_rtx, tmp_reg));
9857 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9860 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9863 /* In the pic_reg_used case, make sure that the got load isn't deleted
9864 when mcount needs it. Blockage to avoid call movement across mcount
9865 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9867 if (crtl->profile && !flag_fentry && pic_reg_used)
9868 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9870 if (crtl->drap_reg && !crtl->stack_realign_needed)
9872 /* vDRAP is setup but after reload it turns out stack realign
9873 isn't necessary, here we will emit prologue to setup DRAP
9874 without stack realign adjustment */
9875 t = choose_baseaddr (0);
9876 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9879 /* Prevent instructions from being scheduled into register save push
9880 sequence when access to the redzone area is done through frame pointer.
9881 The offset between the frame pointer and the stack pointer is calculated
9882 relative to the value of the stack pointer at the end of the function
9883 prologue, and moving instructions that access redzone area via frame
9884 pointer inside push sequence violates this assumption. */
9885 if (frame_pointer_needed && frame.red_zone_size)
9886 emit_insn (gen_memory_blockage ());
9888 /* Emit cld instruction if stringops are used in the function. */
9889 if (TARGET_CLD && ix86_current_function_needs_cld)
9890 emit_insn (gen_cld ());
9893 /* Emit code to restore REG using a POP insn. */
9896 ix86_emit_restore_reg_using_pop (rtx reg)
9898 struct machine_function *m = cfun->machine;
9899 rtx insn = emit_insn (gen_pop (reg));
9901 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9902 m->fs.sp_offset -= UNITS_PER_WORD;
9904 if (m->fs.cfa_reg == crtl->drap_reg
9905 && REGNO (reg) == REGNO (crtl->drap_reg))
9907 /* Previously we'd represented the CFA as an expression
9908 like *(%ebp - 8). We've just popped that value from
9909 the stack, which means we need to reset the CFA to
9910 the drap register. This will remain until we restore
9911 the stack pointer. */
9912 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9913 RTX_FRAME_RELATED_P (insn) = 1;
9915 /* This means that the DRAP register is valid for addressing too. */
9916 m->fs.drap_valid = true;
9920 if (m->fs.cfa_reg == stack_pointer_rtx)
9922 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9923 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9924 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9925 RTX_FRAME_RELATED_P (insn) = 1;
9927 m->fs.cfa_offset -= UNITS_PER_WORD;
9930 /* When the frame pointer is the CFA, and we pop it, we are
9931 swapping back to the stack pointer as the CFA. This happens
9932 for stack frames that don't allocate other data, so we assume
9933 the stack pointer is now pointing at the return address, i.e.
9934 the function entry state, which makes the offset be 1 word. */
9935 if (reg == hard_frame_pointer_rtx)
9937 m->fs.fp_valid = false;
9938 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9940 m->fs.cfa_reg = stack_pointer_rtx;
9941 m->fs.cfa_offset -= UNITS_PER_WORD;
9943 add_reg_note (insn, REG_CFA_DEF_CFA,
9944 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9945 GEN_INT (m->fs.cfa_offset)));
9946 RTX_FRAME_RELATED_P (insn) = 1;
9951 /* Emit code to restore saved registers using POP insns. */
9954 ix86_emit_restore_regs_using_pop (void)
9958 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9959 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9960 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9963 /* Emit code and notes for the LEAVE instruction. */
9966 ix86_emit_leave (void)
9968 struct machine_function *m = cfun->machine;
9969 rtx insn = emit_insn (ix86_gen_leave ());
9971 ix86_add_queued_cfa_restore_notes (insn);
9973 gcc_assert (m->fs.fp_valid);
9974 m->fs.sp_valid = true;
9975 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9976 m->fs.fp_valid = false;
9978 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9980 m->fs.cfa_reg = stack_pointer_rtx;
9981 m->fs.cfa_offset = m->fs.sp_offset;
9983 add_reg_note (insn, REG_CFA_DEF_CFA,
9984 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9985 RTX_FRAME_RELATED_P (insn) = 1;
9986 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9991 /* Emit code to restore saved registers using MOV insns.
9992 First register is restored from CFA - CFA_OFFSET. */
9994 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9995 int maybe_eh_return)
9997 struct machine_function *m = cfun->machine;
10000 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10001 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10003 rtx reg = gen_rtx_REG (Pmode, regno);
10006 mem = choose_baseaddr (cfa_offset);
10007 mem = gen_frame_mem (Pmode, mem);
10008 insn = emit_move_insn (reg, mem);
10010 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10012 /* Previously we'd represented the CFA as an expression
10013 like *(%ebp - 8). We've just popped that value from
10014 the stack, which means we need to reset the CFA to
10015 the drap register. This will remain until we restore
10016 the stack pointer. */
10017 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10018 RTX_FRAME_RELATED_P (insn) = 1;
10020 /* This means that the DRAP register is valid for addressing. */
10021 m->fs.drap_valid = true;
10024 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10026 cfa_offset -= UNITS_PER_WORD;
10030 /* Emit code to restore saved registers using MOV insns.
10031 First register is restored from CFA - CFA_OFFSET. */
10033 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10034 int maybe_eh_return)
10036 unsigned int regno;
10038 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10039 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10041 rtx reg = gen_rtx_REG (V4SFmode, regno);
10044 mem = choose_baseaddr (cfa_offset);
10045 mem = gen_rtx_MEM (V4SFmode, mem);
10046 set_mem_align (mem, 128);
10047 emit_move_insn (reg, mem);
10049 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10055 /* Restore function stack, frame, and registers. */
10058 ix86_expand_epilogue (int style)
10060 struct machine_function *m = cfun->machine;
10061 struct machine_frame_state frame_state_save = m->fs;
10062 struct ix86_frame frame;
10063 bool restore_regs_via_mov;
10066 ix86_finalize_stack_realign_flags ();
10067 ix86_compute_frame_layout (&frame);
10069 m->fs.sp_valid = (!frame_pointer_needed
10070 || (current_function_sp_is_unchanging
10071 && !stack_realign_fp));
10072 gcc_assert (!m->fs.sp_valid
10073 || m->fs.sp_offset == frame.stack_pointer_offset);
10075 /* The FP must be valid if the frame pointer is present. */
10076 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10077 gcc_assert (!m->fs.fp_valid
10078 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10080 /* We must have *some* valid pointer to the stack frame. */
10081 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10083 /* The DRAP is never valid at this point. */
10084 gcc_assert (!m->fs.drap_valid);
10086 /* See the comment about red zone and frame
10087 pointer usage in ix86_expand_prologue. */
10088 if (frame_pointer_needed && frame.red_zone_size)
10089 emit_insn (gen_memory_blockage ());
10091 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10092 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10094 /* Determine the CFA offset of the end of the red-zone. */
10095 m->fs.red_zone_offset = 0;
10096 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10098 /* The red-zone begins below the return address. */
10099 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10101 /* When the register save area is in the aligned portion of
10102 the stack, determine the maximum runtime displacement that
10103 matches up with the aligned frame. */
10104 if (stack_realign_drap)
10105 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10109 /* Special care must be taken for the normal return case of a function
10110 using eh_return: the eax and edx registers are marked as saved, but
10111 not restored along this path. Adjust the save location to match. */
10112 if (crtl->calls_eh_return && style != 2)
10113 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10115 /* If we're only restoring one register and sp is not valid then
10116 using a move instruction to restore the register since it's
10117 less work than reloading sp and popping the register. */
10118 if (!m->fs.sp_valid && frame.nregs <= 1)
10119 restore_regs_via_mov = true;
10120 /* EH_RETURN requires the use of moves to function properly. */
10121 else if (crtl->calls_eh_return)
10122 restore_regs_via_mov = true;
10123 else if (TARGET_EPILOGUE_USING_MOVE
10124 && cfun->machine->use_fast_prologue_epilogue
10125 && (frame.nregs > 1
10126 || m->fs.sp_offset != frame.reg_save_offset))
10127 restore_regs_via_mov = true;
10128 else if (frame_pointer_needed
10130 && m->fs.sp_offset != frame.reg_save_offset)
10131 restore_regs_via_mov = true;
10132 else if (frame_pointer_needed
10133 && TARGET_USE_LEAVE
10134 && cfun->machine->use_fast_prologue_epilogue
10135 && frame.nregs == 1)
10136 restore_regs_via_mov = true;
10138 restore_regs_via_mov = false;
10140 if (restore_regs_via_mov || frame.nsseregs)
10142 /* Ensure that the entire register save area is addressable via
10143 the stack pointer, if we will restore via sp. */
10145 && m->fs.sp_offset > 0x7fffffff
10146 && !(m->fs.fp_valid || m->fs.drap_valid)
10147 && (frame.nsseregs + frame.nregs) != 0)
10149 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10150 GEN_INT (m->fs.sp_offset
10151 - frame.sse_reg_save_offset),
10153 m->fs.cfa_reg == stack_pointer_rtx);
10157 /* If there are any SSE registers to restore, then we have to do it
10158 via moves, since there's obviously no pop for SSE regs. */
10159 if (frame.nsseregs)
10160 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10163 if (restore_regs_via_mov)
10168 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10170 /* eh_return epilogues need %ecx added to the stack pointer. */
10173 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10175 /* Stack align doesn't work with eh_return. */
10176 gcc_assert (!stack_realign_drap);
10177 /* Neither does regparm nested functions. */
10178 gcc_assert (!ix86_static_chain_on_stack);
10180 if (frame_pointer_needed)
10182 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10183 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10184 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10186 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10187 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10189 /* Note that we use SA as a temporary CFA, as the return
10190 address is at the proper place relative to it. We
10191 pretend this happens at the FP restore insn because
10192 prior to this insn the FP would be stored at the wrong
10193 offset relative to SA, and after this insn we have no
10194 other reasonable register to use for the CFA. We don't
10195 bother resetting the CFA to the SP for the duration of
10196 the return insn. */
10197 add_reg_note (insn, REG_CFA_DEF_CFA,
10198 plus_constant (sa, UNITS_PER_WORD));
10199 ix86_add_queued_cfa_restore_notes (insn);
10200 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10201 RTX_FRAME_RELATED_P (insn) = 1;
10203 m->fs.cfa_reg = sa;
10204 m->fs.cfa_offset = UNITS_PER_WORD;
10205 m->fs.fp_valid = false;
10207 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10208 const0_rtx, style, false);
10212 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10213 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10214 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10215 ix86_add_queued_cfa_restore_notes (insn);
10217 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10218 if (m->fs.cfa_offset != UNITS_PER_WORD)
10220 m->fs.cfa_offset = UNITS_PER_WORD;
10221 add_reg_note (insn, REG_CFA_DEF_CFA,
10222 plus_constant (stack_pointer_rtx,
10224 RTX_FRAME_RELATED_P (insn) = 1;
10227 m->fs.sp_offset = UNITS_PER_WORD;
10228 m->fs.sp_valid = true;
10233 /* First step is to deallocate the stack frame so that we can
10234 pop the registers. */
10235 if (!m->fs.sp_valid)
10237 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10238 GEN_INT (m->fs.fp_offset
10239 - frame.reg_save_offset),
10242 else if (m->fs.sp_offset != frame.reg_save_offset)
10244 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10245 GEN_INT (m->fs.sp_offset
10246 - frame.reg_save_offset),
10248 m->fs.cfa_reg == stack_pointer_rtx);
10251 ix86_emit_restore_regs_using_pop ();
10254 /* If we used a stack pointer and haven't already got rid of it,
10256 if (m->fs.fp_valid)
10258 /* If the stack pointer is valid and pointing at the frame
10259 pointer store address, then we only need a pop. */
10260 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10261 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10262 /* Leave results in shorter dependency chains on CPUs that are
10263 able to grok it fast. */
10264 else if (TARGET_USE_LEAVE
10265 || optimize_function_for_size_p (cfun)
10266 || !cfun->machine->use_fast_prologue_epilogue)
10267 ix86_emit_leave ();
10270 pro_epilogue_adjust_stack (stack_pointer_rtx,
10271 hard_frame_pointer_rtx,
10272 const0_rtx, style, !using_drap);
10273 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10279 int param_ptr_offset = UNITS_PER_WORD;
10282 gcc_assert (stack_realign_drap);
10284 if (ix86_static_chain_on_stack)
10285 param_ptr_offset += UNITS_PER_WORD;
10286 if (!call_used_regs[REGNO (crtl->drap_reg)])
10287 param_ptr_offset += UNITS_PER_WORD;
10289 insn = emit_insn (gen_rtx_SET
10290 (VOIDmode, stack_pointer_rtx,
10291 gen_rtx_PLUS (Pmode,
10293 GEN_INT (-param_ptr_offset))));
10294 m->fs.cfa_reg = stack_pointer_rtx;
10295 m->fs.cfa_offset = param_ptr_offset;
10296 m->fs.sp_offset = param_ptr_offset;
10297 m->fs.realigned = false;
10299 add_reg_note (insn, REG_CFA_DEF_CFA,
10300 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10301 GEN_INT (param_ptr_offset)));
10302 RTX_FRAME_RELATED_P (insn) = 1;
10304 if (!call_used_regs[REGNO (crtl->drap_reg)])
10305 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10308 /* At this point the stack pointer must be valid, and we must have
10309 restored all of the registers. We may not have deallocated the
10310 entire stack frame. We've delayed this until now because it may
10311 be possible to merge the local stack deallocation with the
10312 deallocation forced by ix86_static_chain_on_stack. */
10313 gcc_assert (m->fs.sp_valid);
10314 gcc_assert (!m->fs.fp_valid);
10315 gcc_assert (!m->fs.realigned);
10316 if (m->fs.sp_offset != UNITS_PER_WORD)
10318 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10319 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10323 /* Sibcall epilogues don't want a return instruction. */
10326 m->fs = frame_state_save;
10330 if (crtl->args.pops_args && crtl->args.size)
10332 rtx popc = GEN_INT (crtl->args.pops_args);
10334 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10335 address, do explicit add, and jump indirectly to the caller. */
10337 if (crtl->args.pops_args >= 65536)
10339 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10342 /* There is no "pascal" calling convention in any 64bit ABI. */
10343 gcc_assert (!TARGET_64BIT);
10345 insn = emit_insn (gen_pop (ecx));
10346 m->fs.cfa_offset -= UNITS_PER_WORD;
10347 m->fs.sp_offset -= UNITS_PER_WORD;
10349 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10350 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10351 add_reg_note (insn, REG_CFA_REGISTER,
10352 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10353 RTX_FRAME_RELATED_P (insn) = 1;
10355 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10357 emit_jump_insn (gen_return_indirect_internal (ecx));
10360 emit_jump_insn (gen_return_pop_internal (popc));
10363 emit_jump_insn (gen_return_internal ());
10365 /* Restore the state back to the state from the prologue,
10366 so that it's correct for the next epilogue. */
10367 m->fs = frame_state_save;
10370 /* Reset from the function's potential modifications. */
10373 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10374 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10376 if (pic_offset_table_rtx)
10377 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10379 /* Mach-O doesn't support labels at the end of objects, so if
10380 it looks like we might want one, insert a NOP. */
10382 rtx insn = get_last_insn ();
10385 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10386 insn = PREV_INSN (insn);
10390 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10391 fputs ("\tnop\n", file);
10397 /* Return a scratch register to use in the split stack prologue. The
10398 split stack prologue is used for -fsplit-stack. It is the first
10399 instructions in the function, even before the regular prologue.
10400 The scratch register can be any caller-saved register which is not
10401 used for parameters or for the static chain. */
10403 static unsigned int
10404 split_stack_prologue_scratch_regno (void)
10413 is_fastcall = (lookup_attribute ("fastcall",
10414 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10416 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10420 if (DECL_STATIC_CHAIN (cfun->decl))
10422 sorry ("-fsplit-stack does not support fastcall with "
10423 "nested function");
10424 return INVALID_REGNUM;
10428 else if (regparm < 3)
10430 if (!DECL_STATIC_CHAIN (cfun->decl))
10436 sorry ("-fsplit-stack does not support 2 register "
10437 " parameters for a nested function");
10438 return INVALID_REGNUM;
10445 /* FIXME: We could make this work by pushing a register
10446 around the addition and comparison. */
10447 sorry ("-fsplit-stack does not support 3 register parameters");
10448 return INVALID_REGNUM;
10453 /* A SYMBOL_REF for the function which allocates new stackspace for
10456 static GTY(()) rtx split_stack_fn;
10458 /* Handle -fsplit-stack. These are the first instructions in the
10459 function, even before the regular prologue. */
10462 ix86_expand_split_stack_prologue (void)
10464 struct ix86_frame frame;
10465 HOST_WIDE_INT allocate;
10467 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10468 rtx scratch_reg = NULL_RTX;
10469 rtx varargs_label = NULL_RTX;
10471 gcc_assert (flag_split_stack && reload_completed);
10473 ix86_finalize_stack_realign_flags ();
10474 ix86_compute_frame_layout (&frame);
10475 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10477 /* This is the label we will branch to if we have enough stack
10478 space. We expect the basic block reordering pass to reverse this
10479 branch if optimizing, so that we branch in the unlikely case. */
10480 label = gen_label_rtx ();
10482 /* We need to compare the stack pointer minus the frame size with
10483 the stack boundary in the TCB. The stack boundary always gives
10484 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10485 can compare directly. Otherwise we need to do an addition. */
10487 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10488 UNSPEC_STACK_CHECK);
10489 limit = gen_rtx_CONST (Pmode, limit);
10490 limit = gen_rtx_MEM (Pmode, limit);
10491 if (allocate < SPLIT_STACK_AVAILABLE)
10492 current = stack_pointer_rtx;
10495 unsigned int scratch_regno;
10498 /* We need a scratch register to hold the stack pointer minus
10499 the required frame size. Since this is the very start of the
10500 function, the scratch register can be any caller-saved
10501 register which is not used for parameters. */
10502 offset = GEN_INT (- allocate);
10503 scratch_regno = split_stack_prologue_scratch_regno ();
10504 if (scratch_regno == INVALID_REGNUM)
10506 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10507 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10509 /* We don't use ix86_gen_add3 in this case because it will
10510 want to split to lea, but when not optimizing the insn
10511 will not be split after this point. */
10512 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10513 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10518 emit_move_insn (scratch_reg, offset);
10519 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10520 stack_pointer_rtx));
10522 current = scratch_reg;
10525 ix86_expand_branch (GEU, current, limit, label);
10526 jump_insn = get_last_insn ();
10527 JUMP_LABEL (jump_insn) = label;
10529 /* Mark the jump as very likely to be taken. */
10530 add_reg_note (jump_insn, REG_BR_PROB,
10531 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10533 /* Get more stack space. We pass in the desired stack space and the
10534 size of the arguments to copy to the new stack. In 32-bit mode
10535 we push the parameters; __morestack will return on a new stack
10536 anyhow. In 64-bit mode we pass the parameters in r10 and
10538 allocate_rtx = GEN_INT (allocate);
10539 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10540 call_fusage = NULL_RTX;
10545 reg = gen_rtx_REG (Pmode, R10_REG);
10547 /* If this function uses a static chain, it will be in %r10.
10548 Preserve it across the call to __morestack. */
10549 if (DECL_STATIC_CHAIN (cfun->decl))
10553 rax = gen_rtx_REG (Pmode, AX_REG);
10554 emit_move_insn (rax, reg);
10555 use_reg (&call_fusage, rax);
10558 emit_move_insn (reg, allocate_rtx);
10559 use_reg (&call_fusage, reg);
10560 reg = gen_rtx_REG (Pmode, R11_REG);
10561 emit_move_insn (reg, GEN_INT (args_size));
10562 use_reg (&call_fusage, reg);
10566 emit_insn (gen_push (GEN_INT (args_size)));
10567 emit_insn (gen_push (allocate_rtx));
10569 if (split_stack_fn == NULL_RTX)
10570 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10571 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
10572 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10574 add_function_usage_to (call_insn, call_fusage);
10576 /* In order to make call/return prediction work right, we now need
10577 to execute a return instruction. See
10578 libgcc/config/i386/morestack.S for the details on how this works.
10580 For flow purposes gcc must not see this as a return
10581 instruction--we need control flow to continue at the subsequent
10582 label. Therefore, we use an unspec. */
10583 gcc_assert (crtl->args.pops_args < 65536);
10584 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10586 /* If we are in 64-bit mode and this function uses a static chain,
10587 we saved %r10 in %rax before calling _morestack. */
10588 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10589 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10590 gen_rtx_REG (Pmode, AX_REG));
10592 /* If this function calls va_start, we need to store a pointer to
10593 the arguments on the old stack, because they may not have been
10594 all copied to the new stack. At this point the old stack can be
10595 found at the frame pointer value used by __morestack, because
10596 __morestack has set that up before calling back to us. Here we
10597 store that pointer in a scratch register, and in
10598 ix86_expand_prologue we store the scratch register in a stack
10600 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10602 unsigned int scratch_regno;
10606 scratch_regno = split_stack_prologue_scratch_regno ();
10607 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10608 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10612 return address within this function
10613 return address of caller of this function
10615 So we add three words to get to the stack arguments.
10619 return address within this function
10620 first argument to __morestack
10621 second argument to __morestack
10622 return address of caller of this function
10624 So we add five words to get to the stack arguments.
10626 words = TARGET_64BIT ? 3 : 5;
10627 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10628 gen_rtx_PLUS (Pmode, frame_reg,
10629 GEN_INT (words * UNITS_PER_WORD))));
10631 varargs_label = gen_label_rtx ();
10632 emit_jump_insn (gen_jump (varargs_label));
10633 JUMP_LABEL (get_last_insn ()) = varargs_label;
10638 emit_label (label);
10639 LABEL_NUSES (label) = 1;
10641 /* If this function calls va_start, we now have to set the scratch
10642 register for the case where we do not call __morestack. In this
10643 case we need to set it based on the stack pointer. */
10644 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10646 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10647 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10648 GEN_INT (UNITS_PER_WORD))));
10650 emit_label (varargs_label);
10651 LABEL_NUSES (varargs_label) = 1;
10655 /* We may have to tell the dataflow pass that the split stack prologue
10656 is initializing a scratch register. */
10659 ix86_live_on_entry (bitmap regs)
10661 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10663 gcc_assert (flag_split_stack);
10664 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10668 /* Extract the parts of an RTL expression that is a valid memory address
10669 for an instruction. Return 0 if the structure of the address is
10670 grossly off. Return -1 if the address contains ASHIFT, so it is not
10671 strictly valid, but still used for computing length of lea instruction. */
10674 ix86_decompose_address (rtx addr, struct ix86_address *out)
10676 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10677 rtx base_reg, index_reg;
10678 HOST_WIDE_INT scale = 1;
10679 rtx scale_rtx = NULL_RTX;
10682 enum ix86_address_seg seg = SEG_DEFAULT;
10684 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10686 else if (GET_CODE (addr) == PLUS)
10688 rtx addends[4], op;
10696 addends[n++] = XEXP (op, 1);
10699 while (GET_CODE (op) == PLUS);
10704 for (i = n; i >= 0; --i)
10707 switch (GET_CODE (op))
10712 index = XEXP (op, 0);
10713 scale_rtx = XEXP (op, 1);
10719 index = XEXP (op, 0);
10720 tmp = XEXP (op, 1);
10721 if (!CONST_INT_P (tmp))
10723 scale = INTVAL (tmp);
10724 if ((unsigned HOST_WIDE_INT) scale > 3)
10726 scale = 1 << scale;
10730 if (XINT (op, 1) == UNSPEC_TP
10731 && TARGET_TLS_DIRECT_SEG_REFS
10732 && seg == SEG_DEFAULT)
10733 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10762 else if (GET_CODE (addr) == MULT)
10764 index = XEXP (addr, 0); /* index*scale */
10765 scale_rtx = XEXP (addr, 1);
10767 else if (GET_CODE (addr) == ASHIFT)
10769 /* We're called for lea too, which implements ashift on occasion. */
10770 index = XEXP (addr, 0);
10771 tmp = XEXP (addr, 1);
10772 if (!CONST_INT_P (tmp))
10774 scale = INTVAL (tmp);
10775 if ((unsigned HOST_WIDE_INT) scale > 3)
10777 scale = 1 << scale;
10781 disp = addr; /* displacement */
10783 /* Extract the integral value of scale. */
10786 if (!CONST_INT_P (scale_rtx))
10788 scale = INTVAL (scale_rtx);
10791 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10792 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10794 /* Avoid useless 0 displacement. */
10795 if (disp == const0_rtx && (base || index))
10798 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10799 if (base_reg && index_reg && scale == 1
10800 && (index_reg == arg_pointer_rtx
10801 || index_reg == frame_pointer_rtx
10802 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10805 tmp = base, base = index, index = tmp;
10806 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10809 /* Special case: %ebp cannot be encoded as a base without a displacement.
10813 && (base_reg == hard_frame_pointer_rtx
10814 || base_reg == frame_pointer_rtx
10815 || base_reg == arg_pointer_rtx
10816 || (REG_P (base_reg)
10817 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10818 || REGNO (base_reg) == R13_REG))))
10821 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10822 Avoid this by transforming to [%esi+0].
10823 Reload calls address legitimization without cfun defined, so we need
10824 to test cfun for being non-NULL. */
10825 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10826 && base_reg && !index_reg && !disp
10827 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10830 /* Special case: encode reg+reg instead of reg*2. */
10831 if (!base && index && scale == 2)
10832 base = index, base_reg = index_reg, scale = 1;
10834 /* Special case: scaling cannot be encoded without base or displacement. */
10835 if (!base && !disp && index && scale != 1)
10839 out->index = index;
10841 out->scale = scale;
10847 /* Return cost of the memory address x.
10848 For i386, it is better to use a complex address than let gcc copy
10849 the address into a reg and make a new pseudo. But not if the address
10850 requires to two regs - that would mean more pseudos with longer
10853 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10855 struct ix86_address parts;
10857 int ok = ix86_decompose_address (x, &parts);
10861 if (parts.base && GET_CODE (parts.base) == SUBREG)
10862 parts.base = SUBREG_REG (parts.base);
10863 if (parts.index && GET_CODE (parts.index) == SUBREG)
10864 parts.index = SUBREG_REG (parts.index);
10866 /* Attempt to minimize number of registers in the address. */
10868 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10870 && (!REG_P (parts.index)
10871 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10875 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10877 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10878 && parts.base != parts.index)
10881 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10882 since it's predecode logic can't detect the length of instructions
10883 and it degenerates to vector decoded. Increase cost of such
10884 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10885 to split such addresses or even refuse such addresses at all.
10887 Following addressing modes are affected:
10892 The first and last case may be avoidable by explicitly coding the zero in
10893 memory address, but I don't have AMD-K6 machine handy to check this
10897 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10898 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10899 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10905 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10906 this is used for to form addresses to local data when -fPIC is in
10910 darwin_local_data_pic (rtx disp)
10912 return (GET_CODE (disp) == UNSPEC
10913 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10916 /* Determine if a given RTX is a valid constant. We already know this
10917 satisfies CONSTANT_P. */
10920 legitimate_constant_p (rtx x)
10922 switch (GET_CODE (x))
10927 if (GET_CODE (x) == PLUS)
10929 if (!CONST_INT_P (XEXP (x, 1)))
10934 if (TARGET_MACHO && darwin_local_data_pic (x))
10937 /* Only some unspecs are valid as "constants". */
10938 if (GET_CODE (x) == UNSPEC)
10939 switch (XINT (x, 1))
10942 case UNSPEC_GOTOFF:
10943 case UNSPEC_PLTOFF:
10944 return TARGET_64BIT;
10946 case UNSPEC_NTPOFF:
10947 x = XVECEXP (x, 0, 0);
10948 return (GET_CODE (x) == SYMBOL_REF
10949 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10950 case UNSPEC_DTPOFF:
10951 x = XVECEXP (x, 0, 0);
10952 return (GET_CODE (x) == SYMBOL_REF
10953 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10958 /* We must have drilled down to a symbol. */
10959 if (GET_CODE (x) == LABEL_REF)
10961 if (GET_CODE (x) != SYMBOL_REF)
10966 /* TLS symbols are never valid. */
10967 if (SYMBOL_REF_TLS_MODEL (x))
10970 /* DLLIMPORT symbols are never valid. */
10971 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10972 && SYMBOL_REF_DLLIMPORT_P (x))
10977 if (GET_MODE (x) == TImode
10978 && x != CONST0_RTX (TImode)
10984 if (!standard_sse_constant_p (x))
10991 /* Otherwise we handle everything else in the move patterns. */
10995 /* Determine if it's legal to put X into the constant pool. This
10996 is not possible for the address of thread-local symbols, which
10997 is checked above. */
11000 ix86_cannot_force_const_mem (rtx x)
11002 /* We can always put integral constants and vectors in memory. */
11003 switch (GET_CODE (x))
11013 return !legitimate_constant_p (x);
11017 /* Nonzero if the constant value X is a legitimate general operand
11018 when generating PIC code. It is given that flag_pic is on and
11019 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11022 legitimate_pic_operand_p (rtx x)
11026 switch (GET_CODE (x))
11029 inner = XEXP (x, 0);
11030 if (GET_CODE (inner) == PLUS
11031 && CONST_INT_P (XEXP (inner, 1)))
11032 inner = XEXP (inner, 0);
11034 /* Only some unspecs are valid as "constants". */
11035 if (GET_CODE (inner) == UNSPEC)
11036 switch (XINT (inner, 1))
11039 case UNSPEC_GOTOFF:
11040 case UNSPEC_PLTOFF:
11041 return TARGET_64BIT;
11043 x = XVECEXP (inner, 0, 0);
11044 return (GET_CODE (x) == SYMBOL_REF
11045 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11046 case UNSPEC_MACHOPIC_OFFSET:
11047 return legitimate_pic_address_disp_p (x);
11055 return legitimate_pic_address_disp_p (x);
11062 /* Determine if a given CONST RTX is a valid memory displacement
11066 legitimate_pic_address_disp_p (rtx disp)
11070 /* In 64bit mode we can allow direct addresses of symbols and labels
11071 when they are not dynamic symbols. */
11074 rtx op0 = disp, op1;
11076 switch (GET_CODE (disp))
11082 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11084 op0 = XEXP (XEXP (disp, 0), 0);
11085 op1 = XEXP (XEXP (disp, 0), 1);
11086 if (!CONST_INT_P (op1)
11087 || INTVAL (op1) >= 16*1024*1024
11088 || INTVAL (op1) < -16*1024*1024)
11090 if (GET_CODE (op0) == LABEL_REF)
11092 if (GET_CODE (op0) != SYMBOL_REF)
11097 /* TLS references should always be enclosed in UNSPEC. */
11098 if (SYMBOL_REF_TLS_MODEL (op0))
11100 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11101 && ix86_cmodel != CM_LARGE_PIC)
11109 if (GET_CODE (disp) != CONST)
11111 disp = XEXP (disp, 0);
11115 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11116 of GOT tables. We should not need these anyway. */
11117 if (GET_CODE (disp) != UNSPEC
11118 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11119 && XINT (disp, 1) != UNSPEC_GOTOFF
11120 && XINT (disp, 1) != UNSPEC_PLTOFF))
11123 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11124 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11130 if (GET_CODE (disp) == PLUS)
11132 if (!CONST_INT_P (XEXP (disp, 1)))
11134 disp = XEXP (disp, 0);
11138 if (TARGET_MACHO && darwin_local_data_pic (disp))
11141 if (GET_CODE (disp) != UNSPEC)
11144 switch (XINT (disp, 1))
11149 /* We need to check for both symbols and labels because VxWorks loads
11150 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11152 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11153 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11154 case UNSPEC_GOTOFF:
11155 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11156 While ABI specify also 32bit relocation but we don't produce it in
11157 small PIC model at all. */
11158 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11159 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11161 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11163 case UNSPEC_GOTTPOFF:
11164 case UNSPEC_GOTNTPOFF:
11165 case UNSPEC_INDNTPOFF:
11168 disp = XVECEXP (disp, 0, 0);
11169 return (GET_CODE (disp) == SYMBOL_REF
11170 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11171 case UNSPEC_NTPOFF:
11172 disp = XVECEXP (disp, 0, 0);
11173 return (GET_CODE (disp) == SYMBOL_REF
11174 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11175 case UNSPEC_DTPOFF:
11176 disp = XVECEXP (disp, 0, 0);
11177 return (GET_CODE (disp) == SYMBOL_REF
11178 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11184 /* Recognizes RTL expressions that are valid memory addresses for an
11185 instruction. The MODE argument is the machine mode for the MEM
11186 expression that wants to use this address.
11188 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11189 convert common non-canonical forms to canonical form so that they will
11193 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11194 rtx addr, bool strict)
11196 struct ix86_address parts;
11197 rtx base, index, disp;
11198 HOST_WIDE_INT scale;
11200 if (ix86_decompose_address (addr, &parts) <= 0)
11201 /* Decomposition failed. */
11205 index = parts.index;
11207 scale = parts.scale;
11209 /* Validate base register.
11211 Don't allow SUBREG's that span more than a word here. It can lead to spill
11212 failures when the base is one word out of a two word structure, which is
11213 represented internally as a DImode int. */
11221 else if (GET_CODE (base) == SUBREG
11222 && REG_P (SUBREG_REG (base))
11223 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11225 reg = SUBREG_REG (base);
11227 /* Base is not a register. */
11230 if (GET_MODE (base) != Pmode)
11231 /* Base is not in Pmode. */
11234 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11235 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11236 /* Base is not valid. */
11240 /* Validate index register.
11242 Don't allow SUBREG's that span more than a word here -- same as above. */
11250 else if (GET_CODE (index) == SUBREG
11251 && REG_P (SUBREG_REG (index))
11252 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11254 reg = SUBREG_REG (index);
11256 /* Index is not a register. */
11259 if (GET_MODE (index) != Pmode)
11260 /* Index is not in Pmode. */
11263 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11264 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11265 /* Index is not valid. */
11269 /* Validate scale factor. */
11273 /* Scale without index. */
11276 if (scale != 2 && scale != 4 && scale != 8)
11277 /* Scale is not a valid multiplier. */
11281 /* Validate displacement. */
11284 if (GET_CODE (disp) == CONST
11285 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11286 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11287 switch (XINT (XEXP (disp, 0), 1))
11289 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11290 used. While ABI specify also 32bit relocations, we don't produce
11291 them at all and use IP relative instead. */
11293 case UNSPEC_GOTOFF:
11294 gcc_assert (flag_pic);
11296 goto is_legitimate_pic;
11298 /* 64bit address unspec. */
11301 case UNSPEC_GOTPCREL:
11302 gcc_assert (flag_pic);
11303 goto is_legitimate_pic;
11305 case UNSPEC_GOTTPOFF:
11306 case UNSPEC_GOTNTPOFF:
11307 case UNSPEC_INDNTPOFF:
11308 case UNSPEC_NTPOFF:
11309 case UNSPEC_DTPOFF:
11312 case UNSPEC_STACK_CHECK:
11313 gcc_assert (flag_split_stack);
11317 /* Invalid address unspec. */
11321 else if (SYMBOLIC_CONST (disp)
11325 && MACHOPIC_INDIRECT
11326 && !machopic_operand_p (disp)
11332 if (TARGET_64BIT && (index || base))
11334 /* foo@dtpoff(%rX) is ok. */
11335 if (GET_CODE (disp) != CONST
11336 || GET_CODE (XEXP (disp, 0)) != PLUS
11337 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11338 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11339 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11340 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11341 /* Non-constant pic memory reference. */
11344 else if (! legitimate_pic_address_disp_p (disp))
11345 /* Displacement is an invalid pic construct. */
11348 /* This code used to verify that a symbolic pic displacement
11349 includes the pic_offset_table_rtx register.
11351 While this is good idea, unfortunately these constructs may
11352 be created by "adds using lea" optimization for incorrect
11361 This code is nonsensical, but results in addressing
11362 GOT table with pic_offset_table_rtx base. We can't
11363 just refuse it easily, since it gets matched by
11364 "addsi3" pattern, that later gets split to lea in the
11365 case output register differs from input. While this
11366 can be handled by separate addsi pattern for this case
11367 that never results in lea, this seems to be easier and
11368 correct fix for crash to disable this test. */
11370 else if (GET_CODE (disp) != LABEL_REF
11371 && !CONST_INT_P (disp)
11372 && (GET_CODE (disp) != CONST
11373 || !legitimate_constant_p (disp))
11374 && (GET_CODE (disp) != SYMBOL_REF
11375 || !legitimate_constant_p (disp)))
11376 /* Displacement is not constant. */
11378 else if (TARGET_64BIT
11379 && !x86_64_immediate_operand (disp, VOIDmode))
11380 /* Displacement is out of range. */
11384 /* Everything looks valid. */
11388 /* Determine if a given RTX is a valid constant address. */
11391 constant_address_p (rtx x)
11393 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11396 /* Return a unique alias set for the GOT. */
11398 static alias_set_type
11399 ix86_GOT_alias_set (void)
11401 static alias_set_type set = -1;
11403 set = new_alias_set ();
11407 /* Return a legitimate reference for ORIG (an address) using the
11408 register REG. If REG is 0, a new pseudo is generated.
11410 There are two types of references that must be handled:
11412 1. Global data references must load the address from the GOT, via
11413 the PIC reg. An insn is emitted to do this load, and the reg is
11416 2. Static data references, constant pool addresses, and code labels
11417 compute the address as an offset from the GOT, whose base is in
11418 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11419 differentiate them from global data objects. The returned
11420 address is the PIC reg + an unspec constant.
11422 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11423 reg also appears in the address. */
11426 legitimize_pic_address (rtx orig, rtx reg)
11429 rtx new_rtx = orig;
11433 if (TARGET_MACHO && !TARGET_64BIT)
11436 reg = gen_reg_rtx (Pmode);
11437 /* Use the generic Mach-O PIC machinery. */
11438 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11442 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11444 else if (TARGET_64BIT
11445 && ix86_cmodel != CM_SMALL_PIC
11446 && gotoff_operand (addr, Pmode))
11449 /* This symbol may be referenced via a displacement from the PIC
11450 base address (@GOTOFF). */
11452 if (reload_in_progress)
11453 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11454 if (GET_CODE (addr) == CONST)
11455 addr = XEXP (addr, 0);
11456 if (GET_CODE (addr) == PLUS)
11458 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11460 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11463 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11464 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11466 tmpreg = gen_reg_rtx (Pmode);
11469 emit_move_insn (tmpreg, new_rtx);
11473 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11474 tmpreg, 1, OPTAB_DIRECT);
11477 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11479 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11481 /* This symbol may be referenced via a displacement from the PIC
11482 base address (@GOTOFF). */
11484 if (reload_in_progress)
11485 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11486 if (GET_CODE (addr) == CONST)
11487 addr = XEXP (addr, 0);
11488 if (GET_CODE (addr) == PLUS)
11490 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11492 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11495 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11496 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11497 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11501 emit_move_insn (reg, new_rtx);
11505 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11506 /* We can't use @GOTOFF for text labels on VxWorks;
11507 see gotoff_operand. */
11508 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11510 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11512 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11513 return legitimize_dllimport_symbol (addr, true);
11514 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11515 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11516 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11518 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11519 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11523 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11525 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11526 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11527 new_rtx = gen_const_mem (Pmode, new_rtx);
11528 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11531 reg = gen_reg_rtx (Pmode);
11532 /* Use directly gen_movsi, otherwise the address is loaded
11533 into register for CSE. We don't want to CSE this addresses,
11534 instead we CSE addresses from the GOT table, so skip this. */
11535 emit_insn (gen_movsi (reg, new_rtx));
11540 /* This symbol must be referenced via a load from the
11541 Global Offset Table (@GOT). */
11543 if (reload_in_progress)
11544 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11545 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11546 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11548 new_rtx = force_reg (Pmode, new_rtx);
11549 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11550 new_rtx = gen_const_mem (Pmode, new_rtx);
11551 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11554 reg = gen_reg_rtx (Pmode);
11555 emit_move_insn (reg, new_rtx);
11561 if (CONST_INT_P (addr)
11562 && !x86_64_immediate_operand (addr, VOIDmode))
11566 emit_move_insn (reg, addr);
11570 new_rtx = force_reg (Pmode, addr);
11572 else if (GET_CODE (addr) == CONST)
11574 addr = XEXP (addr, 0);
11576 /* We must match stuff we generate before. Assume the only
11577 unspecs that can get here are ours. Not that we could do
11578 anything with them anyway.... */
11579 if (GET_CODE (addr) == UNSPEC
11580 || (GET_CODE (addr) == PLUS
11581 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11583 gcc_assert (GET_CODE (addr) == PLUS);
11585 if (GET_CODE (addr) == PLUS)
11587 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11589 /* Check first to see if this is a constant offset from a @GOTOFF
11590 symbol reference. */
11591 if (gotoff_operand (op0, Pmode)
11592 && CONST_INT_P (op1))
11596 if (reload_in_progress)
11597 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11598 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11600 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11601 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11602 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11606 emit_move_insn (reg, new_rtx);
11612 if (INTVAL (op1) < -16*1024*1024
11613 || INTVAL (op1) >= 16*1024*1024)
11615 if (!x86_64_immediate_operand (op1, Pmode))
11616 op1 = force_reg (Pmode, op1);
11617 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11623 base = legitimize_pic_address (XEXP (addr, 0), reg);
11624 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11625 base == reg ? NULL_RTX : reg);
11627 if (CONST_INT_P (new_rtx))
11628 new_rtx = plus_constant (base, INTVAL (new_rtx));
11631 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11633 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11634 new_rtx = XEXP (new_rtx, 1);
11636 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11644 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11647 get_thread_pointer (int to_reg)
11651 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11655 reg = gen_reg_rtx (Pmode);
11656 insn = gen_rtx_SET (VOIDmode, reg, tp);
11657 insn = emit_insn (insn);
11662 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11663 false if we expect this to be used for a memory address and true if
11664 we expect to load the address into a register. */
11667 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11669 rtx dest, base, off, pic, tp;
11674 case TLS_MODEL_GLOBAL_DYNAMIC:
11675 dest = gen_reg_rtx (Pmode);
11676 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11678 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11680 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11683 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11684 insns = get_insns ();
11687 RTL_CONST_CALL_P (insns) = 1;
11688 emit_libcall_block (insns, dest, rax, x);
11690 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11691 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11693 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11695 if (TARGET_GNU2_TLS)
11697 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11699 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11703 case TLS_MODEL_LOCAL_DYNAMIC:
11704 base = gen_reg_rtx (Pmode);
11705 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11707 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11709 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11712 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11713 insns = get_insns ();
11716 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11717 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11718 RTL_CONST_CALL_P (insns) = 1;
11719 emit_libcall_block (insns, base, rax, note);
11721 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11722 emit_insn (gen_tls_local_dynamic_base_64 (base));
11724 emit_insn (gen_tls_local_dynamic_base_32 (base));
11726 if (TARGET_GNU2_TLS)
11728 rtx x = ix86_tls_module_base ();
11730 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11731 gen_rtx_MINUS (Pmode, x, tp));
11734 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11735 off = gen_rtx_CONST (Pmode, off);
11737 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11739 if (TARGET_GNU2_TLS)
11741 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11743 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11748 case TLS_MODEL_INITIAL_EXEC:
11752 type = UNSPEC_GOTNTPOFF;
11756 if (reload_in_progress)
11757 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11758 pic = pic_offset_table_rtx;
11759 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11761 else if (!TARGET_ANY_GNU_TLS)
11763 pic = gen_reg_rtx (Pmode);
11764 emit_insn (gen_set_got (pic));
11765 type = UNSPEC_GOTTPOFF;
11770 type = UNSPEC_INDNTPOFF;
11773 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11774 off = gen_rtx_CONST (Pmode, off);
11776 off = gen_rtx_PLUS (Pmode, pic, off);
11777 off = gen_const_mem (Pmode, off);
11778 set_mem_alias_set (off, ix86_GOT_alias_set ());
11780 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11782 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11783 off = force_reg (Pmode, off);
11784 return gen_rtx_PLUS (Pmode, base, off);
11788 base = get_thread_pointer (true);
11789 dest = gen_reg_rtx (Pmode);
11790 emit_insn (gen_subsi3 (dest, base, off));
11794 case TLS_MODEL_LOCAL_EXEC:
11795 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11796 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11797 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11798 off = gen_rtx_CONST (Pmode, off);
11800 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11802 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11803 return gen_rtx_PLUS (Pmode, base, off);
11807 base = get_thread_pointer (true);
11808 dest = gen_reg_rtx (Pmode);
11809 emit_insn (gen_subsi3 (dest, base, off));
11814 gcc_unreachable ();
11820 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11823 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11824 htab_t dllimport_map;
11827 get_dllimport_decl (tree decl)
11829 struct tree_map *h, in;
11832 const char *prefix;
11833 size_t namelen, prefixlen;
11838 if (!dllimport_map)
11839 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11841 in.hash = htab_hash_pointer (decl);
11842 in.base.from = decl;
11843 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11844 h = (struct tree_map *) *loc;
11848 *loc = h = ggc_alloc_tree_map ();
11850 h->base.from = decl;
11851 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11852 VAR_DECL, NULL, ptr_type_node);
11853 DECL_ARTIFICIAL (to) = 1;
11854 DECL_IGNORED_P (to) = 1;
11855 DECL_EXTERNAL (to) = 1;
11856 TREE_READONLY (to) = 1;
11858 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11859 name = targetm.strip_name_encoding (name);
11860 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11861 ? "*__imp_" : "*__imp__";
11862 namelen = strlen (name);
11863 prefixlen = strlen (prefix);
11864 imp_name = (char *) alloca (namelen + prefixlen + 1);
11865 memcpy (imp_name, prefix, prefixlen);
11866 memcpy (imp_name + prefixlen, name, namelen + 1);
11868 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11869 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11870 SET_SYMBOL_REF_DECL (rtl, to);
11871 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11873 rtl = gen_const_mem (Pmode, rtl);
11874 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11876 SET_DECL_RTL (to, rtl);
11877 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11882 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11883 true if we require the result be a register. */
11886 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11891 gcc_assert (SYMBOL_REF_DECL (symbol));
11892 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11894 x = DECL_RTL (imp_decl);
11896 x = force_reg (Pmode, x);
11900 /* Try machine-dependent ways of modifying an illegitimate address
11901 to be legitimate. If we find one, return the new, valid address.
11902 This macro is used in only one place: `memory_address' in explow.c.
11904 OLDX is the address as it was before break_out_memory_refs was called.
11905 In some cases it is useful to look at this to decide what needs to be done.
11907 It is always safe for this macro to do nothing. It exists to recognize
11908 opportunities to optimize the output.
11910 For the 80386, we handle X+REG by loading X into a register R and
11911 using R+REG. R will go in a general reg and indexing will be used.
11912 However, if REG is a broken-out memory address or multiplication,
11913 nothing needs to be done because REG can certainly go in a general reg.
11915 When -fpic is used, special handling is needed for symbolic references.
11916 See comments by legitimize_pic_address in i386.c for details. */
11919 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11920 enum machine_mode mode)
11925 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11927 return legitimize_tls_address (x, (enum tls_model) log, false);
11928 if (GET_CODE (x) == CONST
11929 && GET_CODE (XEXP (x, 0)) == PLUS
11930 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11931 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11933 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11934 (enum tls_model) log, false);
11935 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11938 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11940 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11941 return legitimize_dllimport_symbol (x, true);
11942 if (GET_CODE (x) == CONST
11943 && GET_CODE (XEXP (x, 0)) == PLUS
11944 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11945 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11947 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11948 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11952 if (flag_pic && SYMBOLIC_CONST (x))
11953 return legitimize_pic_address (x, 0);
11955 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11956 if (GET_CODE (x) == ASHIFT
11957 && CONST_INT_P (XEXP (x, 1))
11958 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11961 log = INTVAL (XEXP (x, 1));
11962 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11963 GEN_INT (1 << log));
11966 if (GET_CODE (x) == PLUS)
11968 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11970 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11971 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11972 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11975 log = INTVAL (XEXP (XEXP (x, 0), 1));
11976 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11977 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11978 GEN_INT (1 << log));
11981 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11982 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11983 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11986 log = INTVAL (XEXP (XEXP (x, 1), 1));
11987 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11988 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11989 GEN_INT (1 << log));
11992 /* Put multiply first if it isn't already. */
11993 if (GET_CODE (XEXP (x, 1)) == MULT)
11995 rtx tmp = XEXP (x, 0);
11996 XEXP (x, 0) = XEXP (x, 1);
12001 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12002 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12003 created by virtual register instantiation, register elimination, and
12004 similar optimizations. */
12005 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12008 x = gen_rtx_PLUS (Pmode,
12009 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12010 XEXP (XEXP (x, 1), 0)),
12011 XEXP (XEXP (x, 1), 1));
12015 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12016 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12017 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12018 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12019 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12020 && CONSTANT_P (XEXP (x, 1)))
12023 rtx other = NULL_RTX;
12025 if (CONST_INT_P (XEXP (x, 1)))
12027 constant = XEXP (x, 1);
12028 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12030 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12032 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12033 other = XEXP (x, 1);
12041 x = gen_rtx_PLUS (Pmode,
12042 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12043 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12044 plus_constant (other, INTVAL (constant)));
12048 if (changed && ix86_legitimate_address_p (mode, x, false))
12051 if (GET_CODE (XEXP (x, 0)) == MULT)
12054 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12057 if (GET_CODE (XEXP (x, 1)) == MULT)
12060 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12064 && REG_P (XEXP (x, 1))
12065 && REG_P (XEXP (x, 0)))
12068 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12071 x = legitimize_pic_address (x, 0);
12074 if (changed && ix86_legitimate_address_p (mode, x, false))
12077 if (REG_P (XEXP (x, 0)))
12079 rtx temp = gen_reg_rtx (Pmode);
12080 rtx val = force_operand (XEXP (x, 1), temp);
12082 emit_move_insn (temp, val);
12084 XEXP (x, 1) = temp;
12088 else if (REG_P (XEXP (x, 1)))
12090 rtx temp = gen_reg_rtx (Pmode);
12091 rtx val = force_operand (XEXP (x, 0), temp);
12093 emit_move_insn (temp, val);
12095 XEXP (x, 0) = temp;
12103 /* Print an integer constant expression in assembler syntax. Addition
12104 and subtraction are the only arithmetic that may appear in these
12105 expressions. FILE is the stdio stream to write to, X is the rtx, and
12106 CODE is the operand print code from the output string. */
12109 output_pic_addr_const (FILE *file, rtx x, int code)
12113 switch (GET_CODE (x))
12116 gcc_assert (flag_pic);
12121 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12122 output_addr_const (file, x);
12125 const char *name = XSTR (x, 0);
12127 /* Mark the decl as referenced so that cgraph will
12128 output the function. */
12129 if (SYMBOL_REF_DECL (x))
12130 mark_decl_referenced (SYMBOL_REF_DECL (x));
12133 if (MACHOPIC_INDIRECT
12134 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12135 name = machopic_indirection_name (x, /*stub_p=*/true);
12137 assemble_name (file, name);
12139 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12140 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12141 fputs ("@PLT", file);
12148 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12149 assemble_name (asm_out_file, buf);
12153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12157 /* This used to output parentheses around the expression,
12158 but that does not work on the 386 (either ATT or BSD assembler). */
12159 output_pic_addr_const (file, XEXP (x, 0), code);
12163 if (GET_MODE (x) == VOIDmode)
12165 /* We can use %d if the number is <32 bits and positive. */
12166 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12167 fprintf (file, "0x%lx%08lx",
12168 (unsigned long) CONST_DOUBLE_HIGH (x),
12169 (unsigned long) CONST_DOUBLE_LOW (x));
12171 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12174 /* We can't handle floating point constants;
12175 TARGET_PRINT_OPERAND must handle them. */
12176 output_operand_lossage ("floating constant misused");
12180 /* Some assemblers need integer constants to appear first. */
12181 if (CONST_INT_P (XEXP (x, 0)))
12183 output_pic_addr_const (file, XEXP (x, 0), code);
12185 output_pic_addr_const (file, XEXP (x, 1), code);
12189 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12190 output_pic_addr_const (file, XEXP (x, 1), code);
12192 output_pic_addr_const (file, XEXP (x, 0), code);
12198 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12199 output_pic_addr_const (file, XEXP (x, 0), code);
12201 output_pic_addr_const (file, XEXP (x, 1), code);
12203 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12207 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12209 bool f = i386_asm_output_addr_const_extra (file, x);
12214 gcc_assert (XVECLEN (x, 0) == 1);
12215 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12216 switch (XINT (x, 1))
12219 fputs ("@GOT", file);
12221 case UNSPEC_GOTOFF:
12222 fputs ("@GOTOFF", file);
12224 case UNSPEC_PLTOFF:
12225 fputs ("@PLTOFF", file);
12227 case UNSPEC_GOTPCREL:
12228 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12229 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12231 case UNSPEC_GOTTPOFF:
12232 /* FIXME: This might be @TPOFF in Sun ld too. */
12233 fputs ("@gottpoff", file);
12236 fputs ("@tpoff", file);
12238 case UNSPEC_NTPOFF:
12240 fputs ("@tpoff", file);
12242 fputs ("@ntpoff", file);
12244 case UNSPEC_DTPOFF:
12245 fputs ("@dtpoff", file);
12247 case UNSPEC_GOTNTPOFF:
12249 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12250 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12252 fputs ("@gotntpoff", file);
12254 case UNSPEC_INDNTPOFF:
12255 fputs ("@indntpoff", file);
12258 case UNSPEC_MACHOPIC_OFFSET:
12260 machopic_output_function_base_name (file);
12264 output_operand_lossage ("invalid UNSPEC as operand");
12270 output_operand_lossage ("invalid expression as operand");
12274 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12275 We need to emit DTP-relative relocations. */
12277 static void ATTRIBUTE_UNUSED
12278 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12280 fputs (ASM_LONG, file);
12281 output_addr_const (file, x);
12282 fputs ("@dtpoff", file);
12288 fputs (", 0", file);
12291 gcc_unreachable ();
12295 /* Return true if X is a representation of the PIC register. This copes
12296 with calls from ix86_find_base_term, where the register might have
12297 been replaced by a cselib value. */
12300 ix86_pic_register_p (rtx x)
12302 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12303 return (pic_offset_table_rtx
12304 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12306 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12309 /* Helper function for ix86_delegitimize_address.
12310 Attempt to delegitimize TLS local-exec accesses. */
12313 ix86_delegitimize_tls_address (rtx orig_x)
12315 rtx x = orig_x, unspec;
12316 struct ix86_address addr;
12318 if (!TARGET_TLS_DIRECT_SEG_REFS)
12322 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12324 if (ix86_decompose_address (x, &addr) == 0
12325 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12326 || addr.disp == NULL_RTX
12327 || GET_CODE (addr.disp) != CONST)
12329 unspec = XEXP (addr.disp, 0);
12330 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12331 unspec = XEXP (unspec, 0);
12332 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12334 x = XVECEXP (unspec, 0, 0);
12335 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12336 if (unspec != XEXP (addr.disp, 0))
12337 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12340 rtx idx = addr.index;
12341 if (addr.scale != 1)
12342 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12343 x = gen_rtx_PLUS (Pmode, idx, x);
12346 x = gen_rtx_PLUS (Pmode, addr.base, x);
12347 if (MEM_P (orig_x))
12348 x = replace_equiv_address_nv (orig_x, x);
12352 /* In the name of slightly smaller debug output, and to cater to
12353 general assembler lossage, recognize PIC+GOTOFF and turn it back
12354 into a direct symbol reference.
12356 On Darwin, this is necessary to avoid a crash, because Darwin
12357 has a different PIC label for each routine but the DWARF debugging
12358 information is not associated with any particular routine, so it's
12359 necessary to remove references to the PIC label from RTL stored by
12360 the DWARF output code. */
12363 ix86_delegitimize_address (rtx x)
12365 rtx orig_x = delegitimize_mem_from_attrs (x);
12366 /* addend is NULL or some rtx if x is something+GOTOFF where
12367 something doesn't include the PIC register. */
12368 rtx addend = NULL_RTX;
12369 /* reg_addend is NULL or a multiple of some register. */
12370 rtx reg_addend = NULL_RTX;
12371 /* const_addend is NULL or a const_int. */
12372 rtx const_addend = NULL_RTX;
12373 /* This is the result, or NULL. */
12374 rtx result = NULL_RTX;
12383 if (GET_CODE (x) != CONST
12384 || GET_CODE (XEXP (x, 0)) != UNSPEC
12385 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12386 || !MEM_P (orig_x))
12387 return ix86_delegitimize_tls_address (orig_x);
12388 x = XVECEXP (XEXP (x, 0), 0, 0);
12389 if (GET_MODE (orig_x) != Pmode)
12390 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12394 if (GET_CODE (x) != PLUS
12395 || GET_CODE (XEXP (x, 1)) != CONST)
12396 return ix86_delegitimize_tls_address (orig_x);
12398 if (ix86_pic_register_p (XEXP (x, 0)))
12399 /* %ebx + GOT/GOTOFF */
12401 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12403 /* %ebx + %reg * scale + GOT/GOTOFF */
12404 reg_addend = XEXP (x, 0);
12405 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12406 reg_addend = XEXP (reg_addend, 1);
12407 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12408 reg_addend = XEXP (reg_addend, 0);
12411 reg_addend = NULL_RTX;
12412 addend = XEXP (x, 0);
12416 addend = XEXP (x, 0);
12418 x = XEXP (XEXP (x, 1), 0);
12419 if (GET_CODE (x) == PLUS
12420 && CONST_INT_P (XEXP (x, 1)))
12422 const_addend = XEXP (x, 1);
12426 if (GET_CODE (x) == UNSPEC
12427 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12428 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12429 result = XVECEXP (x, 0, 0);
12431 if (TARGET_MACHO && darwin_local_data_pic (x)
12432 && !MEM_P (orig_x))
12433 result = XVECEXP (x, 0, 0);
12436 return ix86_delegitimize_tls_address (orig_x);
12439 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12441 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12444 /* If the rest of original X doesn't involve the PIC register, add
12445 addend and subtract pic_offset_table_rtx. This can happen e.g.
12447 leal (%ebx, %ecx, 4), %ecx
12449 movl foo@GOTOFF(%ecx), %edx
12450 in which case we return (%ecx - %ebx) + foo. */
12451 if (pic_offset_table_rtx)
12452 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12453 pic_offset_table_rtx),
12458 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12459 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12463 /* If X is a machine specific address (i.e. a symbol or label being
12464 referenced as a displacement from the GOT implemented using an
12465 UNSPEC), then return the base term. Otherwise return X. */
12468 ix86_find_base_term (rtx x)
12474 if (GET_CODE (x) != CONST)
12476 term = XEXP (x, 0);
12477 if (GET_CODE (term) == PLUS
12478 && (CONST_INT_P (XEXP (term, 1))
12479 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12480 term = XEXP (term, 0);
12481 if (GET_CODE (term) != UNSPEC
12482 || XINT (term, 1) != UNSPEC_GOTPCREL)
12485 return XVECEXP (term, 0, 0);
12488 return ix86_delegitimize_address (x);
12492 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12493 int fp, FILE *file)
12495 const char *suffix;
12497 if (mode == CCFPmode || mode == CCFPUmode)
12499 code = ix86_fp_compare_code_to_integer (code);
12503 code = reverse_condition (code);
12554 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12558 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12559 Those same assemblers have the same but opposite lossage on cmov. */
12560 if (mode == CCmode)
12561 suffix = fp ? "nbe" : "a";
12562 else if (mode == CCCmode)
12565 gcc_unreachable ();
12581 gcc_unreachable ();
12585 gcc_assert (mode == CCmode || mode == CCCmode);
12602 gcc_unreachable ();
12606 /* ??? As above. */
12607 gcc_assert (mode == CCmode || mode == CCCmode);
12608 suffix = fp ? "nb" : "ae";
12611 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12615 /* ??? As above. */
12616 if (mode == CCmode)
12618 else if (mode == CCCmode)
12619 suffix = fp ? "nb" : "ae";
12621 gcc_unreachable ();
12624 suffix = fp ? "u" : "p";
12627 suffix = fp ? "nu" : "np";
12630 gcc_unreachable ();
12632 fputs (suffix, file);
12635 /* Print the name of register X to FILE based on its machine mode and number.
12636 If CODE is 'w', pretend the mode is HImode.
12637 If CODE is 'b', pretend the mode is QImode.
12638 If CODE is 'k', pretend the mode is SImode.
12639 If CODE is 'q', pretend the mode is DImode.
12640 If CODE is 'x', pretend the mode is V4SFmode.
12641 If CODE is 't', pretend the mode is V8SFmode.
12642 If CODE is 'h', pretend the reg is the 'high' byte register.
12643 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12644 If CODE is 'd', duplicate the operand for AVX instruction.
12648 print_reg (rtx x, int code, FILE *file)
12651 bool duplicated = code == 'd' && TARGET_AVX;
12653 gcc_assert (x == pc_rtx
12654 || (REGNO (x) != ARG_POINTER_REGNUM
12655 && REGNO (x) != FRAME_POINTER_REGNUM
12656 && REGNO (x) != FLAGS_REG
12657 && REGNO (x) != FPSR_REG
12658 && REGNO (x) != FPCR_REG));
12660 if (ASSEMBLER_DIALECT == ASM_ATT)
12665 gcc_assert (TARGET_64BIT);
12666 fputs ("rip", file);
12670 if (code == 'w' || MMX_REG_P (x))
12672 else if (code == 'b')
12674 else if (code == 'k')
12676 else if (code == 'q')
12678 else if (code == 'y')
12680 else if (code == 'h')
12682 else if (code == 'x')
12684 else if (code == 't')
12687 code = GET_MODE_SIZE (GET_MODE (x));
12689 /* Irritatingly, AMD extended registers use different naming convention
12690 from the normal registers. */
12691 if (REX_INT_REG_P (x))
12693 gcc_assert (TARGET_64BIT);
12697 error ("extended registers have no high halves");
12700 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12703 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12706 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12709 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12712 error ("unsupported operand size for extended register");
12722 if (STACK_TOP_P (x))
12731 if (! ANY_FP_REG_P (x))
12732 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12737 reg = hi_reg_name[REGNO (x)];
12740 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12742 reg = qi_reg_name[REGNO (x)];
12745 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12747 reg = qi_high_reg_name[REGNO (x)];
12752 gcc_assert (!duplicated);
12754 fputs (hi_reg_name[REGNO (x)] + 1, file);
12759 gcc_unreachable ();
12765 if (ASSEMBLER_DIALECT == ASM_ATT)
12766 fprintf (file, ", %%%s", reg);
12768 fprintf (file, ", %s", reg);
12772 /* Locate some local-dynamic symbol still in use by this function
12773 so that we can print its name in some tls_local_dynamic_base
12777 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12781 if (GET_CODE (x) == SYMBOL_REF
12782 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12784 cfun->machine->some_ld_name = XSTR (x, 0);
12791 static const char *
12792 get_some_local_dynamic_name (void)
12796 if (cfun->machine->some_ld_name)
12797 return cfun->machine->some_ld_name;
12799 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12800 if (NONDEBUG_INSN_P (insn)
12801 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12802 return cfun->machine->some_ld_name;
12807 /* Meaning of CODE:
12808 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12809 C -- print opcode suffix for set/cmov insn.
12810 c -- like C, but print reversed condition
12811 F,f -- likewise, but for floating-point.
12812 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12814 R -- print the prefix for register names.
12815 z -- print the opcode suffix for the size of the current operand.
12816 Z -- likewise, with special suffixes for x87 instructions.
12817 * -- print a star (in certain assembler syntax)
12818 A -- print an absolute memory reference.
12819 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12820 s -- print a shift double count, followed by the assemblers argument
12822 b -- print the QImode name of the register for the indicated operand.
12823 %b0 would print %al if operands[0] is reg 0.
12824 w -- likewise, print the HImode name of the register.
12825 k -- likewise, print the SImode name of the register.
12826 q -- likewise, print the DImode name of the register.
12827 x -- likewise, print the V4SFmode name of the register.
12828 t -- likewise, print the V8SFmode name of the register.
12829 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12830 y -- print "st(0)" instead of "st" as a register.
12831 d -- print duplicated register operand for AVX instruction.
12832 D -- print condition for SSE cmp instruction.
12833 P -- if PIC, print an @PLT suffix.
12834 X -- don't print any sort of PIC '@' suffix for a symbol.
12835 & -- print some in-use local-dynamic symbol name.
12836 H -- print a memory address offset by 8; used for sse high-parts
12837 Y -- print condition for XOP pcom* instruction.
12838 + -- print a branch hint as 'cs' or 'ds' prefix
12839 ; -- print a semicolon (after prefixes due to bug in older gas).
12840 @ -- print a segment register of thread base pointer load
12844 ix86_print_operand (FILE *file, rtx x, int code)
12851 if (ASSEMBLER_DIALECT == ASM_ATT)
12857 const char *name = get_some_local_dynamic_name ();
12859 output_operand_lossage ("'%%&' used without any "
12860 "local dynamic TLS references");
12862 assemble_name (file, name);
12867 switch (ASSEMBLER_DIALECT)
12874 /* Intel syntax. For absolute addresses, registers should not
12875 be surrounded by braces. */
12879 ix86_print_operand (file, x, 0);
12886 gcc_unreachable ();
12889 ix86_print_operand (file, x, 0);
12894 if (ASSEMBLER_DIALECT == ASM_ATT)
12899 if (ASSEMBLER_DIALECT == ASM_ATT)
12904 if (ASSEMBLER_DIALECT == ASM_ATT)
12909 if (ASSEMBLER_DIALECT == ASM_ATT)
12914 if (ASSEMBLER_DIALECT == ASM_ATT)
12919 if (ASSEMBLER_DIALECT == ASM_ATT)
12924 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12926 /* Opcodes don't get size suffixes if using Intel opcodes. */
12927 if (ASSEMBLER_DIALECT == ASM_INTEL)
12930 switch (GET_MODE_SIZE (GET_MODE (x)))
12949 output_operand_lossage
12950 ("invalid operand size for operand code '%c'", code);
12955 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12957 (0, "non-integer operand used with operand code '%c'", code);
12961 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12962 if (ASSEMBLER_DIALECT == ASM_INTEL)
12965 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12967 switch (GET_MODE_SIZE (GET_MODE (x)))
12970 #ifdef HAVE_AS_IX86_FILDS
12980 #ifdef HAVE_AS_IX86_FILDQ
12983 fputs ("ll", file);
12991 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12993 /* 387 opcodes don't get size suffixes
12994 if the operands are registers. */
12995 if (STACK_REG_P (x))
12998 switch (GET_MODE_SIZE (GET_MODE (x)))
13019 output_operand_lossage
13020 ("invalid operand type used with operand code '%c'", code);
13024 output_operand_lossage
13025 ("invalid operand size for operand code '%c'", code);
13042 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13044 ix86_print_operand (file, x, 0);
13045 fputs (", ", file);
13050 /* Little bit of braindamage here. The SSE compare instructions
13051 does use completely different names for the comparisons that the
13052 fp conditional moves. */
13055 switch (GET_CODE (x))
13058 fputs ("eq", file);
13061 fputs ("eq_us", file);
13064 fputs ("lt", file);
13067 fputs ("nge", file);
13070 fputs ("le", file);
13073 fputs ("ngt", file);
13076 fputs ("unord", file);
13079 fputs ("neq", file);
13082 fputs ("neq_oq", file);
13085 fputs ("ge", file);
13088 fputs ("nlt", file);
13091 fputs ("gt", file);
13094 fputs ("nle", file);
13097 fputs ("ord", file);
13100 output_operand_lossage ("operand is not a condition code, "
13101 "invalid operand code 'D'");
13107 switch (GET_CODE (x))
13111 fputs ("eq", file);
13115 fputs ("lt", file);
13119 fputs ("le", file);
13122 fputs ("unord", file);
13126 fputs ("neq", file);
13130 fputs ("nlt", file);
13134 fputs ("nle", file);
13137 fputs ("ord", file);
13140 output_operand_lossage ("operand is not a condition code, "
13141 "invalid operand code 'D'");
13147 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13148 if (ASSEMBLER_DIALECT == ASM_ATT)
13150 switch (GET_MODE (x))
13152 case HImode: putc ('w', file); break;
13154 case SFmode: putc ('l', file); break;
13156 case DFmode: putc ('q', file); break;
13157 default: gcc_unreachable ();
13164 if (!COMPARISON_P (x))
13166 output_operand_lossage ("operand is neither a constant nor a "
13167 "condition code, invalid operand code "
13171 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13174 if (!COMPARISON_P (x))
13176 output_operand_lossage ("operand is neither a constant nor a "
13177 "condition code, invalid operand code "
13181 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13182 if (ASSEMBLER_DIALECT == ASM_ATT)
13185 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13188 /* Like above, but reverse condition */
13190 /* Check to see if argument to %c is really a constant
13191 and not a condition code which needs to be reversed. */
13192 if (!COMPARISON_P (x))
13194 output_operand_lossage ("operand is neither a constant nor a "
13195 "condition code, invalid operand "
13199 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13202 if (!COMPARISON_P (x))
13204 output_operand_lossage ("operand is neither a constant nor a "
13205 "condition code, invalid operand "
13209 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13210 if (ASSEMBLER_DIALECT == ASM_ATT)
13213 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13217 /* It doesn't actually matter what mode we use here, as we're
13218 only going to use this for printing. */
13219 x = adjust_address_nv (x, DImode, 8);
13227 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13230 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13233 int pred_val = INTVAL (XEXP (x, 0));
13235 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13236 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13238 int taken = pred_val > REG_BR_PROB_BASE / 2;
13239 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13241 /* Emit hints only in the case default branch prediction
13242 heuristics would fail. */
13243 if (taken != cputaken)
13245 /* We use 3e (DS) prefix for taken branches and
13246 2e (CS) prefix for not taken branches. */
13248 fputs ("ds ; ", file);
13250 fputs ("cs ; ", file);
13258 switch (GET_CODE (x))
13261 fputs ("neq", file);
13264 fputs ("eq", file);
13268 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13272 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13276 fputs ("le", file);
13280 fputs ("lt", file);
13283 fputs ("unord", file);
13286 fputs ("ord", file);
13289 fputs ("ueq", file);
13292 fputs ("nlt", file);
13295 fputs ("nle", file);
13298 fputs ("ule", file);
13301 fputs ("ult", file);
13304 fputs ("une", file);
13307 output_operand_lossage ("operand is not a condition code, "
13308 "invalid operand code 'Y'");
13314 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13320 if (ASSEMBLER_DIALECT == ASM_ATT)
13323 /* The kernel uses a different segment register for performance
13324 reasons; a system call would not have to trash the userspace
13325 segment register, which would be expensive. */
13326 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13327 fputs ("fs", file);
13329 fputs ("gs", file);
13333 output_operand_lossage ("invalid operand code '%c'", code);
13338 print_reg (x, code, file);
13340 else if (MEM_P (x))
13342 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13343 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13344 && GET_MODE (x) != BLKmode)
13347 switch (GET_MODE_SIZE (GET_MODE (x)))
13349 case 1: size = "BYTE"; break;
13350 case 2: size = "WORD"; break;
13351 case 4: size = "DWORD"; break;
13352 case 8: size = "QWORD"; break;
13353 case 12: size = "TBYTE"; break;
13355 if (GET_MODE (x) == XFmode)
13360 case 32: size = "YMMWORD"; break;
13362 gcc_unreachable ();
13365 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13368 else if (code == 'w')
13370 else if (code == 'k')
13373 fputs (size, file);
13374 fputs (" PTR ", file);
13378 /* Avoid (%rip) for call operands. */
13379 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13380 && !CONST_INT_P (x))
13381 output_addr_const (file, x);
13382 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13383 output_operand_lossage ("invalid constraints for operand");
13385 output_address (x);
13388 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13393 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13394 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13396 if (ASSEMBLER_DIALECT == ASM_ATT)
13398 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13400 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13402 fprintf (file, "0x%08x", (unsigned int) l);
13405 /* These float cases don't actually occur as immediate operands. */
13406 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13410 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13411 fputs (dstr, file);
13414 else if (GET_CODE (x) == CONST_DOUBLE
13415 && GET_MODE (x) == XFmode)
13419 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13420 fputs (dstr, file);
13425 /* We have patterns that allow zero sets of memory, for instance.
13426 In 64-bit mode, we should probably support all 8-byte vectors,
13427 since we can in fact encode that into an immediate. */
13428 if (GET_CODE (x) == CONST_VECTOR)
13430 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13436 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13438 if (ASSEMBLER_DIALECT == ASM_ATT)
13441 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13442 || GET_CODE (x) == LABEL_REF)
13444 if (ASSEMBLER_DIALECT == ASM_ATT)
13447 fputs ("OFFSET FLAT:", file);
13450 if (CONST_INT_P (x))
13451 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13453 output_pic_addr_const (file, x, code);
13455 output_addr_const (file, x);
13460 ix86_print_operand_punct_valid_p (unsigned char code)
13462 return (code == '@' || code == '*' || code == '+'
13463 || code == '&' || code == ';');
13466 /* Print a memory operand whose address is ADDR. */
13469 ix86_print_operand_address (FILE *file, rtx addr)
13471 struct ix86_address parts;
13472 rtx base, index, disp;
13474 int ok = ix86_decompose_address (addr, &parts);
13479 index = parts.index;
13481 scale = parts.scale;
13489 if (ASSEMBLER_DIALECT == ASM_ATT)
13491 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13494 gcc_unreachable ();
13497 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13498 if (TARGET_64BIT && !base && !index)
13502 if (GET_CODE (disp) == CONST
13503 && GET_CODE (XEXP (disp, 0)) == PLUS
13504 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13505 symbol = XEXP (XEXP (disp, 0), 0);
13507 if (GET_CODE (symbol) == LABEL_REF
13508 || (GET_CODE (symbol) == SYMBOL_REF
13509 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13512 if (!base && !index)
13514 /* Displacement only requires special attention. */
13516 if (CONST_INT_P (disp))
13518 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13519 fputs ("ds:", file);
13520 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13523 output_pic_addr_const (file, disp, 0);
13525 output_addr_const (file, disp);
13529 if (ASSEMBLER_DIALECT == ASM_ATT)
13534 output_pic_addr_const (file, disp, 0);
13535 else if (GET_CODE (disp) == LABEL_REF)
13536 output_asm_label (disp);
13538 output_addr_const (file, disp);
13543 print_reg (base, 0, file);
13547 print_reg (index, 0, file);
13549 fprintf (file, ",%d", scale);
13555 rtx offset = NULL_RTX;
13559 /* Pull out the offset of a symbol; print any symbol itself. */
13560 if (GET_CODE (disp) == CONST
13561 && GET_CODE (XEXP (disp, 0)) == PLUS
13562 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13564 offset = XEXP (XEXP (disp, 0), 1);
13565 disp = gen_rtx_CONST (VOIDmode,
13566 XEXP (XEXP (disp, 0), 0));
13570 output_pic_addr_const (file, disp, 0);
13571 else if (GET_CODE (disp) == LABEL_REF)
13572 output_asm_label (disp);
13573 else if (CONST_INT_P (disp))
13576 output_addr_const (file, disp);
13582 print_reg (base, 0, file);
13585 if (INTVAL (offset) >= 0)
13587 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13591 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13598 print_reg (index, 0, file);
13600 fprintf (file, "*%d", scale);
13607 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13610 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13614 if (GET_CODE (x) != UNSPEC)
13617 op = XVECEXP (x, 0, 0);
13618 switch (XINT (x, 1))
13620 case UNSPEC_GOTTPOFF:
13621 output_addr_const (file, op);
13622 /* FIXME: This might be @TPOFF in Sun ld. */
13623 fputs ("@gottpoff", file);
13626 output_addr_const (file, op);
13627 fputs ("@tpoff", file);
13629 case UNSPEC_NTPOFF:
13630 output_addr_const (file, op);
13632 fputs ("@tpoff", file);
13634 fputs ("@ntpoff", file);
13636 case UNSPEC_DTPOFF:
13637 output_addr_const (file, op);
13638 fputs ("@dtpoff", file);
13640 case UNSPEC_GOTNTPOFF:
13641 output_addr_const (file, op);
13643 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13644 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13646 fputs ("@gotntpoff", file);
13648 case UNSPEC_INDNTPOFF:
13649 output_addr_const (file, op);
13650 fputs ("@indntpoff", file);
13653 case UNSPEC_MACHOPIC_OFFSET:
13654 output_addr_const (file, op);
13656 machopic_output_function_base_name (file);
13660 case UNSPEC_STACK_CHECK:
13664 gcc_assert (flag_split_stack);
13666 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
13667 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
13669 gcc_unreachable ();
13672 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
13683 /* Split one or more double-mode RTL references into pairs of half-mode
13684 references. The RTL can be REG, offsettable MEM, integer constant, or
13685 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
13686 split and "num" is its length. lo_half and hi_half are output arrays
13687 that parallel "operands". */
13690 split_double_mode (enum machine_mode mode, rtx operands[],
13691 int num, rtx lo_half[], rtx hi_half[])
13693 enum machine_mode half_mode;
13699 half_mode = DImode;
13702 half_mode = SImode;
13705 gcc_unreachable ();
13708 byte = GET_MODE_SIZE (half_mode);
13712 rtx op = operands[num];
13714 /* simplify_subreg refuse to split volatile memory addresses,
13715 but we still have to handle it. */
13718 lo_half[num] = adjust_address (op, half_mode, 0);
13719 hi_half[num] = adjust_address (op, half_mode, byte);
13723 lo_half[num] = simplify_gen_subreg (half_mode, op,
13724 GET_MODE (op) == VOIDmode
13725 ? mode : GET_MODE (op), 0);
13726 hi_half[num] = simplify_gen_subreg (half_mode, op,
13727 GET_MODE (op) == VOIDmode
13728 ? mode : GET_MODE (op), byte);
13733 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13734 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13735 is the expression of the binary operation. The output may either be
13736 emitted here, or returned to the caller, like all output_* functions.
13738 There is no guarantee that the operands are the same mode, as they
13739 might be within FLOAT or FLOAT_EXTEND expressions. */
13741 #ifndef SYSV386_COMPAT
13742 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13743 wants to fix the assemblers because that causes incompatibility
13744 with gcc. No-one wants to fix gcc because that causes
13745 incompatibility with assemblers... You can use the option of
13746 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13747 #define SYSV386_COMPAT 1
13751 output_387_binary_op (rtx insn, rtx *operands)
13753 static char buf[40];
13756 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13758 #ifdef ENABLE_CHECKING
13759 /* Even if we do not want to check the inputs, this documents input
13760 constraints. Which helps in understanding the following code. */
13761 if (STACK_REG_P (operands[0])
13762 && ((REG_P (operands[1])
13763 && REGNO (operands[0]) == REGNO (operands[1])
13764 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13765 || (REG_P (operands[2])
13766 && REGNO (operands[0]) == REGNO (operands[2])
13767 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13768 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13771 gcc_assert (is_sse);
13774 switch (GET_CODE (operands[3]))
13777 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13778 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13786 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13787 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13795 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13796 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13804 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13805 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13813 gcc_unreachable ();
13820 strcpy (buf, ssep);
13821 if (GET_MODE (operands[0]) == SFmode)
13822 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13824 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13828 strcpy (buf, ssep + 1);
13829 if (GET_MODE (operands[0]) == SFmode)
13830 strcat (buf, "ss\t{%2, %0|%0, %2}");
13832 strcat (buf, "sd\t{%2, %0|%0, %2}");
13838 switch (GET_CODE (operands[3]))
13842 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13844 rtx temp = operands[2];
13845 operands[2] = operands[1];
13846 operands[1] = temp;
13849 /* know operands[0] == operands[1]. */
13851 if (MEM_P (operands[2]))
13857 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13859 if (STACK_TOP_P (operands[0]))
13860 /* How is it that we are storing to a dead operand[2]?
13861 Well, presumably operands[1] is dead too. We can't
13862 store the result to st(0) as st(0) gets popped on this
13863 instruction. Instead store to operands[2] (which I
13864 think has to be st(1)). st(1) will be popped later.
13865 gcc <= 2.8.1 didn't have this check and generated
13866 assembly code that the Unixware assembler rejected. */
13867 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13869 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13873 if (STACK_TOP_P (operands[0]))
13874 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13876 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13881 if (MEM_P (operands[1]))
13887 if (MEM_P (operands[2]))
13893 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13896 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13897 derived assemblers, confusingly reverse the direction of
13898 the operation for fsub{r} and fdiv{r} when the
13899 destination register is not st(0). The Intel assembler
13900 doesn't have this brain damage. Read !SYSV386_COMPAT to
13901 figure out what the hardware really does. */
13902 if (STACK_TOP_P (operands[0]))
13903 p = "{p\t%0, %2|rp\t%2, %0}";
13905 p = "{rp\t%2, %0|p\t%0, %2}";
13907 if (STACK_TOP_P (operands[0]))
13908 /* As above for fmul/fadd, we can't store to st(0). */
13909 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13911 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13916 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13919 if (STACK_TOP_P (operands[0]))
13920 p = "{rp\t%0, %1|p\t%1, %0}";
13922 p = "{p\t%1, %0|rp\t%0, %1}";
13924 if (STACK_TOP_P (operands[0]))
13925 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13927 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13932 if (STACK_TOP_P (operands[0]))
13934 if (STACK_TOP_P (operands[1]))
13935 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13937 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13940 else if (STACK_TOP_P (operands[1]))
13943 p = "{\t%1, %0|r\t%0, %1}";
13945 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13951 p = "{r\t%2, %0|\t%0, %2}";
13953 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13959 gcc_unreachable ();
13966 /* Return needed mode for entity in optimize_mode_switching pass. */
13969 ix86_mode_needed (int entity, rtx insn)
13971 enum attr_i387_cw mode;
13973 /* The mode UNINITIALIZED is used to store control word after a
13974 function call or ASM pattern. The mode ANY specify that function
13975 has no requirements on the control word and make no changes in the
13976 bits we are interested in. */
13979 || (NONJUMP_INSN_P (insn)
13980 && (asm_noperands (PATTERN (insn)) >= 0
13981 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13982 return I387_CW_UNINITIALIZED;
13984 if (recog_memoized (insn) < 0)
13985 return I387_CW_ANY;
13987 mode = get_attr_i387_cw (insn);
13992 if (mode == I387_CW_TRUNC)
13997 if (mode == I387_CW_FLOOR)
14002 if (mode == I387_CW_CEIL)
14007 if (mode == I387_CW_MASK_PM)
14012 gcc_unreachable ();
14015 return I387_CW_ANY;
14018 /* Output code to initialize control word copies used by trunc?f?i and
14019 rounding patterns. CURRENT_MODE is set to current control word,
14020 while NEW_MODE is set to new control word. */
14023 emit_i387_cw_initialization (int mode)
14025 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14028 enum ix86_stack_slot slot;
14030 rtx reg = gen_reg_rtx (HImode);
14032 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14033 emit_move_insn (reg, copy_rtx (stored_mode));
14035 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14036 || optimize_function_for_size_p (cfun))
14040 case I387_CW_TRUNC:
14041 /* round toward zero (truncate) */
14042 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14043 slot = SLOT_CW_TRUNC;
14046 case I387_CW_FLOOR:
14047 /* round down toward -oo */
14048 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14049 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14050 slot = SLOT_CW_FLOOR;
14054 /* round up toward +oo */
14055 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14056 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14057 slot = SLOT_CW_CEIL;
14060 case I387_CW_MASK_PM:
14061 /* mask precision exception for nearbyint() */
14062 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14063 slot = SLOT_CW_MASK_PM;
14067 gcc_unreachable ();
14074 case I387_CW_TRUNC:
14075 /* round toward zero (truncate) */
14076 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14077 slot = SLOT_CW_TRUNC;
14080 case I387_CW_FLOOR:
14081 /* round down toward -oo */
14082 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14083 slot = SLOT_CW_FLOOR;
14087 /* round up toward +oo */
14088 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14089 slot = SLOT_CW_CEIL;
14092 case I387_CW_MASK_PM:
14093 /* mask precision exception for nearbyint() */
14094 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14095 slot = SLOT_CW_MASK_PM;
14099 gcc_unreachable ();
14103 gcc_assert (slot < MAX_386_STACK_LOCALS);
14105 new_mode = assign_386_stack_local (HImode, slot);
14106 emit_move_insn (new_mode, reg);
14109 /* Output code for INSN to convert a float to a signed int. OPERANDS
14110 are the insn operands. The output may be [HSD]Imode and the input
14111 operand may be [SDX]Fmode. */
14114 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14116 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14117 int dimode_p = GET_MODE (operands[0]) == DImode;
14118 int round_mode = get_attr_i387_cw (insn);
14120 /* Jump through a hoop or two for DImode, since the hardware has no
14121 non-popping instruction. We used to do this a different way, but
14122 that was somewhat fragile and broke with post-reload splitters. */
14123 if ((dimode_p || fisttp) && !stack_top_dies)
14124 output_asm_insn ("fld\t%y1", operands);
14126 gcc_assert (STACK_TOP_P (operands[1]));
14127 gcc_assert (MEM_P (operands[0]));
14128 gcc_assert (GET_MODE (operands[1]) != TFmode);
14131 output_asm_insn ("fisttp%Z0\t%0", operands);
14134 if (round_mode != I387_CW_ANY)
14135 output_asm_insn ("fldcw\t%3", operands);
14136 if (stack_top_dies || dimode_p)
14137 output_asm_insn ("fistp%Z0\t%0", operands);
14139 output_asm_insn ("fist%Z0\t%0", operands);
14140 if (round_mode != I387_CW_ANY)
14141 output_asm_insn ("fldcw\t%2", operands);
14147 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14148 have the values zero or one, indicates the ffreep insn's operand
14149 from the OPERANDS array. */
14151 static const char *
14152 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14154 if (TARGET_USE_FFREEP)
14155 #ifdef HAVE_AS_IX86_FFREEP
14156 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14159 static char retval[32];
14160 int regno = REGNO (operands[opno]);
14162 gcc_assert (FP_REGNO_P (regno));
14164 regno -= FIRST_STACK_REG;
14166 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14171 return opno ? "fstp\t%y1" : "fstp\t%y0";
14175 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14176 should be used. UNORDERED_P is true when fucom should be used. */
14179 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14181 int stack_top_dies;
14182 rtx cmp_op0, cmp_op1;
14183 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14187 cmp_op0 = operands[0];
14188 cmp_op1 = operands[1];
14192 cmp_op0 = operands[1];
14193 cmp_op1 = operands[2];
14198 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14199 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14200 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14201 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14203 if (GET_MODE (operands[0]) == SFmode)
14205 return &ucomiss[TARGET_AVX ? 0 : 1];
14207 return &comiss[TARGET_AVX ? 0 : 1];
14210 return &ucomisd[TARGET_AVX ? 0 : 1];
14212 return &comisd[TARGET_AVX ? 0 : 1];
14215 gcc_assert (STACK_TOP_P (cmp_op0));
14217 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14219 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14221 if (stack_top_dies)
14223 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14224 return output_387_ffreep (operands, 1);
14227 return "ftst\n\tfnstsw\t%0";
14230 if (STACK_REG_P (cmp_op1)
14232 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14233 && REGNO (cmp_op1) != FIRST_STACK_REG)
14235 /* If both the top of the 387 stack dies, and the other operand
14236 is also a stack register that dies, then this must be a
14237 `fcompp' float compare */
14241 /* There is no double popping fcomi variant. Fortunately,
14242 eflags is immune from the fstp's cc clobbering. */
14244 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14246 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14247 return output_387_ffreep (operands, 0);
14252 return "fucompp\n\tfnstsw\t%0";
14254 return "fcompp\n\tfnstsw\t%0";
14259 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14261 static const char * const alt[16] =
14263 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14264 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14265 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14266 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14268 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14269 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14273 "fcomi\t{%y1, %0|%0, %y1}",
14274 "fcomip\t{%y1, %0|%0, %y1}",
14275 "fucomi\t{%y1, %0|%0, %y1}",
14276 "fucomip\t{%y1, %0|%0, %y1}",
14287 mask = eflags_p << 3;
14288 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14289 mask |= unordered_p << 1;
14290 mask |= stack_top_dies;
14292 gcc_assert (mask < 16);
14301 ix86_output_addr_vec_elt (FILE *file, int value)
14303 const char *directive = ASM_LONG;
14307 directive = ASM_QUAD;
14309 gcc_assert (!TARGET_64BIT);
14312 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14316 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14318 const char *directive = ASM_LONG;
14321 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14322 directive = ASM_QUAD;
14324 gcc_assert (!TARGET_64BIT);
14326 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14327 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14328 fprintf (file, "%s%s%d-%s%d\n",
14329 directive, LPREFIX, value, LPREFIX, rel);
14330 else if (HAVE_AS_GOTOFF_IN_DATA)
14331 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14333 else if (TARGET_MACHO)
14335 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14336 machopic_output_function_base_name (file);
14341 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14342 GOT_SYMBOL_NAME, LPREFIX, value);
14345 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14349 ix86_expand_clear (rtx dest)
14353 /* We play register width games, which are only valid after reload. */
14354 gcc_assert (reload_completed);
14356 /* Avoid HImode and its attendant prefix byte. */
14357 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14358 dest = gen_rtx_REG (SImode, REGNO (dest));
14359 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14361 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14362 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14364 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14365 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14371 /* X is an unchanging MEM. If it is a constant pool reference, return
14372 the constant pool rtx, else NULL. */
14375 maybe_get_pool_constant (rtx x)
14377 x = ix86_delegitimize_address (XEXP (x, 0));
14379 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14380 return get_pool_constant (x);
14386 ix86_expand_move (enum machine_mode mode, rtx operands[])
14389 enum tls_model model;
14394 if (GET_CODE (op1) == SYMBOL_REF)
14396 model = SYMBOL_REF_TLS_MODEL (op1);
14399 op1 = legitimize_tls_address (op1, model, true);
14400 op1 = force_operand (op1, op0);
14404 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14405 && SYMBOL_REF_DLLIMPORT_P (op1))
14406 op1 = legitimize_dllimport_symbol (op1, false);
14408 else if (GET_CODE (op1) == CONST
14409 && GET_CODE (XEXP (op1, 0)) == PLUS
14410 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14412 rtx addend = XEXP (XEXP (op1, 0), 1);
14413 rtx symbol = XEXP (XEXP (op1, 0), 0);
14416 model = SYMBOL_REF_TLS_MODEL (symbol);
14418 tmp = legitimize_tls_address (symbol, model, true);
14419 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14420 && SYMBOL_REF_DLLIMPORT_P (symbol))
14421 tmp = legitimize_dllimport_symbol (symbol, true);
14425 tmp = force_operand (tmp, NULL);
14426 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14427 op0, 1, OPTAB_DIRECT);
14433 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14435 if (TARGET_MACHO && !TARGET_64BIT)
14440 rtx temp = ((reload_in_progress
14441 || ((op0 && REG_P (op0))
14443 ? op0 : gen_reg_rtx (Pmode));
14444 op1 = machopic_indirect_data_reference (op1, temp);
14445 op1 = machopic_legitimize_pic_address (op1, mode,
14446 temp == op1 ? 0 : temp);
14448 else if (MACHOPIC_INDIRECT)
14449 op1 = machopic_indirect_data_reference (op1, 0);
14457 op1 = force_reg (Pmode, op1);
14458 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14460 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14461 op1 = legitimize_pic_address (op1, reg);
14470 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14471 || !push_operand (op0, mode))
14473 op1 = force_reg (mode, op1);
14475 if (push_operand (op0, mode)
14476 && ! general_no_elim_operand (op1, mode))
14477 op1 = copy_to_mode_reg (mode, op1);
14479 /* Force large constants in 64bit compilation into register
14480 to get them CSEed. */
14481 if (can_create_pseudo_p ()
14482 && (mode == DImode) && TARGET_64BIT
14483 && immediate_operand (op1, mode)
14484 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14485 && !register_operand (op0, mode)
14487 op1 = copy_to_mode_reg (mode, op1);
14489 if (can_create_pseudo_p ()
14490 && FLOAT_MODE_P (mode)
14491 && GET_CODE (op1) == CONST_DOUBLE)
14493 /* If we are loading a floating point constant to a register,
14494 force the value to memory now, since we'll get better code
14495 out the back end. */
14497 op1 = validize_mem (force_const_mem (mode, op1));
14498 if (!register_operand (op0, mode))
14500 rtx temp = gen_reg_rtx (mode);
14501 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14502 emit_move_insn (op0, temp);
14508 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14512 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14514 rtx op0 = operands[0], op1 = operands[1];
14515 unsigned int align = GET_MODE_ALIGNMENT (mode);
14517 /* Force constants other than zero into memory. We do not know how
14518 the instructions used to build constants modify the upper 64 bits
14519 of the register, once we have that information we may be able
14520 to handle some of them more efficiently. */
14521 if (can_create_pseudo_p ()
14522 && register_operand (op0, mode)
14523 && (CONSTANT_P (op1)
14524 || (GET_CODE (op1) == SUBREG
14525 && CONSTANT_P (SUBREG_REG (op1))))
14526 && !standard_sse_constant_p (op1))
14527 op1 = validize_mem (force_const_mem (mode, op1));
14529 /* We need to check memory alignment for SSE mode since attribute
14530 can make operands unaligned. */
14531 if (can_create_pseudo_p ()
14532 && SSE_REG_MODE_P (mode)
14533 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14534 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14538 /* ix86_expand_vector_move_misalign() does not like constants ... */
14539 if (CONSTANT_P (op1)
14540 || (GET_CODE (op1) == SUBREG
14541 && CONSTANT_P (SUBREG_REG (op1))))
14542 op1 = validize_mem (force_const_mem (mode, op1));
14544 /* ... nor both arguments in memory. */
14545 if (!register_operand (op0, mode)
14546 && !register_operand (op1, mode))
14547 op1 = force_reg (mode, op1);
14549 tmp[0] = op0; tmp[1] = op1;
14550 ix86_expand_vector_move_misalign (mode, tmp);
14554 /* Make operand1 a register if it isn't already. */
14555 if (can_create_pseudo_p ()
14556 && !register_operand (op0, mode)
14557 && !register_operand (op1, mode))
14559 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14563 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14566 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14567 straight to ix86_expand_vector_move. */
14568 /* Code generation for scalar reg-reg moves of single and double precision data:
14569 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14573 if (x86_sse_partial_reg_dependency == true)
14578 Code generation for scalar loads of double precision data:
14579 if (x86_sse_split_regs == true)
14580 movlpd mem, reg (gas syntax)
14584 Code generation for unaligned packed loads of single precision data
14585 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14586 if (x86_sse_unaligned_move_optimal)
14589 if (x86_sse_partial_reg_dependency == true)
14601 Code generation for unaligned packed loads of double precision data
14602 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14603 if (x86_sse_unaligned_move_optimal)
14606 if (x86_sse_split_regs == true)
14619 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14628 switch (GET_MODE_CLASS (mode))
14630 case MODE_VECTOR_INT:
14632 switch (GET_MODE_SIZE (mode))
14635 /* If we're optimizing for size, movups is the smallest. */
14636 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14638 op0 = gen_lowpart (V4SFmode, op0);
14639 op1 = gen_lowpart (V4SFmode, op1);
14640 emit_insn (gen_avx_movups (op0, op1));
14643 op0 = gen_lowpart (V16QImode, op0);
14644 op1 = gen_lowpart (V16QImode, op1);
14645 emit_insn (gen_avx_movdqu (op0, op1));
14648 op0 = gen_lowpart (V32QImode, op0);
14649 op1 = gen_lowpart (V32QImode, op1);
14650 emit_insn (gen_avx_movdqu256 (op0, op1));
14653 gcc_unreachable ();
14656 case MODE_VECTOR_FLOAT:
14657 op0 = gen_lowpart (mode, op0);
14658 op1 = gen_lowpart (mode, op1);
14663 emit_insn (gen_avx_movups (op0, op1));
14666 emit_insn (gen_avx_movups256 (op0, op1));
14669 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14671 op0 = gen_lowpart (V4SFmode, op0);
14672 op1 = gen_lowpart (V4SFmode, op1);
14673 emit_insn (gen_avx_movups (op0, op1));
14676 emit_insn (gen_avx_movupd (op0, op1));
14679 emit_insn (gen_avx_movupd256 (op0, op1));
14682 gcc_unreachable ();
14687 gcc_unreachable ();
14695 /* If we're optimizing for size, movups is the smallest. */
14696 if (optimize_insn_for_size_p ()
14697 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14699 op0 = gen_lowpart (V4SFmode, op0);
14700 op1 = gen_lowpart (V4SFmode, op1);
14701 emit_insn (gen_sse_movups (op0, op1));
14705 /* ??? If we have typed data, then it would appear that using
14706 movdqu is the only way to get unaligned data loaded with
14708 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14710 op0 = gen_lowpart (V16QImode, op0);
14711 op1 = gen_lowpart (V16QImode, op1);
14712 emit_insn (gen_sse2_movdqu (op0, op1));
14716 if (TARGET_SSE2 && mode == V2DFmode)
14720 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14722 op0 = gen_lowpart (V2DFmode, op0);
14723 op1 = gen_lowpart (V2DFmode, op1);
14724 emit_insn (gen_sse2_movupd (op0, op1));
14728 /* When SSE registers are split into halves, we can avoid
14729 writing to the top half twice. */
14730 if (TARGET_SSE_SPLIT_REGS)
14732 emit_clobber (op0);
14737 /* ??? Not sure about the best option for the Intel chips.
14738 The following would seem to satisfy; the register is
14739 entirely cleared, breaking the dependency chain. We
14740 then store to the upper half, with a dependency depth
14741 of one. A rumor has it that Intel recommends two movsd
14742 followed by an unpacklpd, but this is unconfirmed. And
14743 given that the dependency depth of the unpacklpd would
14744 still be one, I'm not sure why this would be better. */
14745 zero = CONST0_RTX (V2DFmode);
14748 m = adjust_address (op1, DFmode, 0);
14749 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14750 m = adjust_address (op1, DFmode, 8);
14751 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14755 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14757 op0 = gen_lowpart (V4SFmode, op0);
14758 op1 = gen_lowpart (V4SFmode, op1);
14759 emit_insn (gen_sse_movups (op0, op1));
14763 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14764 emit_move_insn (op0, CONST0_RTX (mode));
14766 emit_clobber (op0);
14768 if (mode != V4SFmode)
14769 op0 = gen_lowpart (V4SFmode, op0);
14770 m = adjust_address (op1, V2SFmode, 0);
14771 emit_insn (gen_sse_loadlps (op0, op0, m));
14772 m = adjust_address (op1, V2SFmode, 8);
14773 emit_insn (gen_sse_loadhps (op0, op0, m));
14776 else if (MEM_P (op0))
14778 /* If we're optimizing for size, movups is the smallest. */
14779 if (optimize_insn_for_size_p ()
14780 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14782 op0 = gen_lowpart (V4SFmode, op0);
14783 op1 = gen_lowpart (V4SFmode, op1);
14784 emit_insn (gen_sse_movups (op0, op1));
14788 /* ??? Similar to above, only less clear because of quote
14789 typeless stores unquote. */
14790 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14791 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14793 op0 = gen_lowpart (V16QImode, op0);
14794 op1 = gen_lowpart (V16QImode, op1);
14795 emit_insn (gen_sse2_movdqu (op0, op1));
14799 if (TARGET_SSE2 && mode == V2DFmode)
14801 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14803 op0 = gen_lowpart (V2DFmode, op0);
14804 op1 = gen_lowpart (V2DFmode, op1);
14805 emit_insn (gen_sse2_movupd (op0, op1));
14809 m = adjust_address (op0, DFmode, 0);
14810 emit_insn (gen_sse2_storelpd (m, op1));
14811 m = adjust_address (op0, DFmode, 8);
14812 emit_insn (gen_sse2_storehpd (m, op1));
14817 if (mode != V4SFmode)
14818 op1 = gen_lowpart (V4SFmode, op1);
14820 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14822 op0 = gen_lowpart (V4SFmode, op0);
14823 emit_insn (gen_sse_movups (op0, op1));
14827 m = adjust_address (op0, V2SFmode, 0);
14828 emit_insn (gen_sse_storelps (m, op1));
14829 m = adjust_address (op0, V2SFmode, 8);
14830 emit_insn (gen_sse_storehps (m, op1));
14835 gcc_unreachable ();
14838 /* Expand a push in MODE. This is some mode for which we do not support
14839 proper push instructions, at least from the registers that we expect
14840 the value to live in. */
14843 ix86_expand_push (enum machine_mode mode, rtx x)
14847 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14848 GEN_INT (-GET_MODE_SIZE (mode)),
14849 stack_pointer_rtx, 1, OPTAB_DIRECT);
14850 if (tmp != stack_pointer_rtx)
14851 emit_move_insn (stack_pointer_rtx, tmp);
14853 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14855 /* When we push an operand onto stack, it has to be aligned at least
14856 at the function argument boundary. However since we don't have
14857 the argument type, we can't determine the actual argument
14859 emit_move_insn (tmp, x);
14862 /* Helper function of ix86_fixup_binary_operands to canonicalize
14863 operand order. Returns true if the operands should be swapped. */
14866 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14869 rtx dst = operands[0];
14870 rtx src1 = operands[1];
14871 rtx src2 = operands[2];
14873 /* If the operation is not commutative, we can't do anything. */
14874 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14877 /* Highest priority is that src1 should match dst. */
14878 if (rtx_equal_p (dst, src1))
14880 if (rtx_equal_p (dst, src2))
14883 /* Next highest priority is that immediate constants come second. */
14884 if (immediate_operand (src2, mode))
14886 if (immediate_operand (src1, mode))
14889 /* Lowest priority is that memory references should come second. */
14899 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14900 destination to use for the operation. If different from the true
14901 destination in operands[0], a copy operation will be required. */
14904 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14907 rtx dst = operands[0];
14908 rtx src1 = operands[1];
14909 rtx src2 = operands[2];
14911 /* Canonicalize operand order. */
14912 if (ix86_swap_binary_operands_p (code, mode, operands))
14916 /* It is invalid to swap operands of different modes. */
14917 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14924 /* Both source operands cannot be in memory. */
14925 if (MEM_P (src1) && MEM_P (src2))
14927 /* Optimization: Only read from memory once. */
14928 if (rtx_equal_p (src1, src2))
14930 src2 = force_reg (mode, src2);
14934 src2 = force_reg (mode, src2);
14937 /* If the destination is memory, and we do not have matching source
14938 operands, do things in registers. */
14939 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14940 dst = gen_reg_rtx (mode);
14942 /* Source 1 cannot be a constant. */
14943 if (CONSTANT_P (src1))
14944 src1 = force_reg (mode, src1);
14946 /* Source 1 cannot be a non-matching memory. */
14947 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14948 src1 = force_reg (mode, src1);
14950 operands[1] = src1;
14951 operands[2] = src2;
14955 /* Similarly, but assume that the destination has already been
14956 set up properly. */
14959 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14960 enum machine_mode mode, rtx operands[])
14962 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14963 gcc_assert (dst == operands[0]);
14966 /* Attempt to expand a binary operator. Make the expansion closer to the
14967 actual machine, then just general_operand, which will allow 3 separate
14968 memory references (one output, two input) in a single insn. */
14971 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14974 rtx src1, src2, dst, op, clob;
14976 dst = ix86_fixup_binary_operands (code, mode, operands);
14977 src1 = operands[1];
14978 src2 = operands[2];
14980 /* Emit the instruction. */
14982 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14983 if (reload_in_progress)
14985 /* Reload doesn't know about the flags register, and doesn't know that
14986 it doesn't want to clobber it. We can only do this with PLUS. */
14987 gcc_assert (code == PLUS);
14992 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14993 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14996 /* Fix up the destination if needed. */
14997 if (dst != operands[0])
14998 emit_move_insn (operands[0], dst);
15001 /* Return TRUE or FALSE depending on whether the binary operator meets the
15002 appropriate constraints. */
15005 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15008 rtx dst = operands[0];
15009 rtx src1 = operands[1];
15010 rtx src2 = operands[2];
15012 /* Both source operands cannot be in memory. */
15013 if (MEM_P (src1) && MEM_P (src2))
15016 /* Canonicalize operand order for commutative operators. */
15017 if (ix86_swap_binary_operands_p (code, mode, operands))
15024 /* If the destination is memory, we must have a matching source operand. */
15025 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15028 /* Source 1 cannot be a constant. */
15029 if (CONSTANT_P (src1))
15032 /* Source 1 cannot be a non-matching memory. */
15033 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15035 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15036 return (code == AND
15039 || (TARGET_64BIT && mode == DImode))
15040 && CONST_INT_P (src2)
15041 && (INTVAL (src2) == 0xff
15042 || INTVAL (src2) == 0xffff));
15048 /* Attempt to expand a unary operator. Make the expansion closer to the
15049 actual machine, then just general_operand, which will allow 2 separate
15050 memory references (one output, one input) in a single insn. */
15053 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15056 int matching_memory;
15057 rtx src, dst, op, clob;
15062 /* If the destination is memory, and we do not have matching source
15063 operands, do things in registers. */
15064 matching_memory = 0;
15067 if (rtx_equal_p (dst, src))
15068 matching_memory = 1;
15070 dst = gen_reg_rtx (mode);
15073 /* When source operand is memory, destination must match. */
15074 if (MEM_P (src) && !matching_memory)
15075 src = force_reg (mode, src);
15077 /* Emit the instruction. */
15079 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15080 if (reload_in_progress || code == NOT)
15082 /* Reload doesn't know about the flags register, and doesn't know that
15083 it doesn't want to clobber it. */
15084 gcc_assert (code == NOT);
15089 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15090 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15093 /* Fix up the destination if needed. */
15094 if (dst != operands[0])
15095 emit_move_insn (operands[0], dst);
15098 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15099 divisor are within the the range [0-255]. */
15102 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15105 rtx end_label, qimode_label;
15106 rtx insn, div, mod;
15107 rtx scratch, tmp0, tmp1, tmp2;
15108 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15109 rtx (*gen_zero_extend) (rtx, rtx);
15110 rtx (*gen_test_ccno_1) (rtx, rtx);
15115 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15116 gen_test_ccno_1 = gen_testsi_ccno_1;
15117 gen_zero_extend = gen_zero_extendqisi2;
15120 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15121 gen_test_ccno_1 = gen_testdi_ccno_1;
15122 gen_zero_extend = gen_zero_extendqidi2;
15125 gcc_unreachable ();
15128 end_label = gen_label_rtx ();
15129 qimode_label = gen_label_rtx ();
15131 scratch = gen_reg_rtx (mode);
15133 /* Use 8bit unsigned divimod if dividend and divisor are within the
15134 the range [0-255]. */
15135 emit_move_insn (scratch, operands[2]);
15136 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15137 scratch, 1, OPTAB_DIRECT);
15138 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15139 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15140 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15141 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15142 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15144 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15145 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15146 JUMP_LABEL (insn) = qimode_label;
15148 /* Generate original signed/unsigned divimod. */
15149 div = gen_divmod4_1 (operands[0], operands[1],
15150 operands[2], operands[3]);
15153 /* Branch to the end. */
15154 emit_jump_insn (gen_jump (end_label));
15157 /* Generate 8bit unsigned divide. */
15158 emit_label (qimode_label);
15159 /* Don't use operands[0] for result of 8bit divide since not all
15160 registers support QImode ZERO_EXTRACT. */
15161 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15162 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15163 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15164 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15168 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15169 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15173 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15174 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15177 /* Extract remainder from AH. */
15178 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15179 if (REG_P (operands[1]))
15180 insn = emit_move_insn (operands[1], tmp1);
15183 /* Need a new scratch register since the old one has result
15185 scratch = gen_reg_rtx (mode);
15186 emit_move_insn (scratch, tmp1);
15187 insn = emit_move_insn (operands[1], scratch);
15189 set_unique_reg_note (insn, REG_EQUAL, mod);
15191 /* Zero extend quotient from AL. */
15192 tmp1 = gen_lowpart (QImode, tmp0);
15193 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15194 set_unique_reg_note (insn, REG_EQUAL, div);
15196 emit_label (end_label);
15199 #define LEA_SEARCH_THRESHOLD 12
15201 /* Search backward for non-agu definition of register number REGNO1
15202 or register number REGNO2 in INSN's basic block until
15203 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15204 2. Reach BB boundary, or
15205 3. Reach agu definition.
15206 Returns the distance between the non-agu definition point and INSN.
15207 If no definition point, returns -1. */
15210 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15213 basic_block bb = BLOCK_FOR_INSN (insn);
15216 enum attr_type insn_type;
15218 if (insn != BB_HEAD (bb))
15220 rtx prev = PREV_INSN (insn);
15221 while (prev && distance < LEA_SEARCH_THRESHOLD)
15223 if (NONDEBUG_INSN_P (prev))
15226 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15227 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15228 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15229 && (regno1 == DF_REF_REGNO (*def_rec)
15230 || regno2 == DF_REF_REGNO (*def_rec)))
15232 insn_type = get_attr_type (prev);
15233 if (insn_type != TYPE_LEA)
15237 if (prev == BB_HEAD (bb))
15239 prev = PREV_INSN (prev);
15243 if (distance < LEA_SEARCH_THRESHOLD)
15247 bool simple_loop = false;
15249 FOR_EACH_EDGE (e, ei, bb->preds)
15252 simple_loop = true;
15258 rtx prev = BB_END (bb);
15261 && distance < LEA_SEARCH_THRESHOLD)
15263 if (NONDEBUG_INSN_P (prev))
15266 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15267 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15268 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15269 && (regno1 == DF_REF_REGNO (*def_rec)
15270 || regno2 == DF_REF_REGNO (*def_rec)))
15272 insn_type = get_attr_type (prev);
15273 if (insn_type != TYPE_LEA)
15277 prev = PREV_INSN (prev);
15285 /* get_attr_type may modify recog data. We want to make sure
15286 that recog data is valid for instruction INSN, on which
15287 distance_non_agu_define is called. INSN is unchanged here. */
15288 extract_insn_cached (insn);
15292 /* Return the distance between INSN and the next insn that uses
15293 register number REGNO0 in memory address. Return -1 if no such
15294 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15297 distance_agu_use (unsigned int regno0, rtx insn)
15299 basic_block bb = BLOCK_FOR_INSN (insn);
15304 if (insn != BB_END (bb))
15306 rtx next = NEXT_INSN (insn);
15307 while (next && distance < LEA_SEARCH_THRESHOLD)
15309 if (NONDEBUG_INSN_P (next))
15313 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15314 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15315 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15316 && regno0 == DF_REF_REGNO (*use_rec))
15318 /* Return DISTANCE if OP0 is used in memory
15319 address in NEXT. */
15323 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15324 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15325 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15326 && regno0 == DF_REF_REGNO (*def_rec))
15328 /* Return -1 if OP0 is set in NEXT. */
15332 if (next == BB_END (bb))
15334 next = NEXT_INSN (next);
15338 if (distance < LEA_SEARCH_THRESHOLD)
15342 bool simple_loop = false;
15344 FOR_EACH_EDGE (e, ei, bb->succs)
15347 simple_loop = true;
15353 rtx next = BB_HEAD (bb);
15356 && distance < LEA_SEARCH_THRESHOLD)
15358 if (NONDEBUG_INSN_P (next))
15362 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15363 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15364 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15365 && regno0 == DF_REF_REGNO (*use_rec))
15367 /* Return DISTANCE if OP0 is used in memory
15368 address in NEXT. */
15372 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15373 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15374 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15375 && regno0 == DF_REF_REGNO (*def_rec))
15377 /* Return -1 if OP0 is set in NEXT. */
15382 next = NEXT_INSN (next);
15390 /* Define this macro to tune LEA priority vs ADD, it take effect when
15391 there is a dilemma of choicing LEA or ADD
15392 Negative value: ADD is more preferred than LEA
15394 Positive value: LEA is more preferred than ADD*/
15395 #define IX86_LEA_PRIORITY 2
15397 /* Return true if it is ok to optimize an ADD operation to LEA
15398 operation to avoid flag register consumation. For most processors,
15399 ADD is faster than LEA. For the processors like ATOM, if the
15400 destination register of LEA holds an actual address which will be
15401 used soon, LEA is better and otherwise ADD is better. */
15404 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15406 unsigned int regno0 = true_regnum (operands[0]);
15407 unsigned int regno1 = true_regnum (operands[1]);
15408 unsigned int regno2 = true_regnum (operands[2]);
15410 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15411 if (regno0 != regno1 && regno0 != regno2)
15414 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15418 int dist_define, dist_use;
15420 /* Return false if REGNO0 isn't used in memory address. */
15421 dist_use = distance_agu_use (regno0, insn);
15425 dist_define = distance_non_agu_define (regno1, regno2, insn);
15426 if (dist_define <= 0)
15429 /* If this insn has both backward non-agu dependence and forward
15430 agu dependence, the one with short distance take effect. */
15431 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15438 /* Return true if destination reg of SET_BODY is shift count of
15442 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15448 /* Retrieve destination of SET_BODY. */
15449 switch (GET_CODE (set_body))
15452 set_dest = SET_DEST (set_body);
15453 if (!set_dest || !REG_P (set_dest))
15457 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15458 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15466 /* Retrieve shift count of USE_BODY. */
15467 switch (GET_CODE (use_body))
15470 shift_rtx = XEXP (use_body, 1);
15473 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15474 if (ix86_dep_by_shift_count_body (set_body,
15475 XVECEXP (use_body, 0, i)))
15483 && (GET_CODE (shift_rtx) == ASHIFT
15484 || GET_CODE (shift_rtx) == LSHIFTRT
15485 || GET_CODE (shift_rtx) == ASHIFTRT
15486 || GET_CODE (shift_rtx) == ROTATE
15487 || GET_CODE (shift_rtx) == ROTATERT))
15489 rtx shift_count = XEXP (shift_rtx, 1);
15491 /* Return true if shift count is dest of SET_BODY. */
15492 if (REG_P (shift_count)
15493 && true_regnum (set_dest) == true_regnum (shift_count))
15500 /* Return true if destination reg of SET_INSN is shift count of
15504 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15506 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15507 PATTERN (use_insn));
15510 /* Return TRUE or FALSE depending on whether the unary operator meets the
15511 appropriate constraints. */
15514 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15515 enum machine_mode mode ATTRIBUTE_UNUSED,
15516 rtx operands[2] ATTRIBUTE_UNUSED)
15518 /* If one of operands is memory, source and destination must match. */
15519 if ((MEM_P (operands[0])
15520 || MEM_P (operands[1]))
15521 && ! rtx_equal_p (operands[0], operands[1]))
15526 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15527 are ok, keeping in mind the possible movddup alternative. */
15530 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15532 if (MEM_P (operands[0]))
15533 return rtx_equal_p (operands[0], operands[1 + high]);
15534 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15535 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15539 /* Post-reload splitter for converting an SF or DFmode value in an
15540 SSE register into an unsigned SImode. */
15543 ix86_split_convert_uns_si_sse (rtx operands[])
15545 enum machine_mode vecmode;
15546 rtx value, large, zero_or_two31, input, two31, x;
15548 large = operands[1];
15549 zero_or_two31 = operands[2];
15550 input = operands[3];
15551 two31 = operands[4];
15552 vecmode = GET_MODE (large);
15553 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15555 /* Load up the value into the low element. We must ensure that the other
15556 elements are valid floats -- zero is the easiest such value. */
15559 if (vecmode == V4SFmode)
15560 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15562 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15566 input = gen_rtx_REG (vecmode, REGNO (input));
15567 emit_move_insn (value, CONST0_RTX (vecmode));
15568 if (vecmode == V4SFmode)
15569 emit_insn (gen_sse_movss (value, value, input));
15571 emit_insn (gen_sse2_movsd (value, value, input));
15574 emit_move_insn (large, two31);
15575 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15577 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15578 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15580 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15581 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15583 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15584 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15586 large = gen_rtx_REG (V4SImode, REGNO (large));
15587 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15589 x = gen_rtx_REG (V4SImode, REGNO (value));
15590 if (vecmode == V4SFmode)
15591 emit_insn (gen_sse2_cvttps2dq (x, value));
15593 emit_insn (gen_sse2_cvttpd2dq (x, value));
15596 emit_insn (gen_xorv4si3 (value, value, large));
15599 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15600 Expects the 64-bit DImode to be supplied in a pair of integral
15601 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15602 -mfpmath=sse, !optimize_size only. */
15605 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15607 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15608 rtx int_xmm, fp_xmm;
15609 rtx biases, exponents;
15612 int_xmm = gen_reg_rtx (V4SImode);
15613 if (TARGET_INTER_UNIT_MOVES)
15614 emit_insn (gen_movdi_to_sse (int_xmm, input));
15615 else if (TARGET_SSE_SPLIT_REGS)
15617 emit_clobber (int_xmm);
15618 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15622 x = gen_reg_rtx (V2DImode);
15623 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15624 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15627 x = gen_rtx_CONST_VECTOR (V4SImode,
15628 gen_rtvec (4, GEN_INT (0x43300000UL),
15629 GEN_INT (0x45300000UL),
15630 const0_rtx, const0_rtx));
15631 exponents = validize_mem (force_const_mem (V4SImode, x));
15633 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15634 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15636 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15637 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15638 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15639 (0x1.0p84 + double(fp_value_hi_xmm)).
15640 Note these exponents differ by 32. */
15642 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15644 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15645 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15646 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15647 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15648 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15649 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15650 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15651 biases = validize_mem (force_const_mem (V2DFmode, biases));
15652 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15654 /* Add the upper and lower DFmode values together. */
15656 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15659 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15660 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15661 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15664 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15667 /* Not used, but eases macroization of patterns. */
15669 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15670 rtx input ATTRIBUTE_UNUSED)
15672 gcc_unreachable ();
15675 /* Convert an unsigned SImode value into a DFmode. Only currently used
15676 for SSE, but applicable anywhere. */
15679 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15681 REAL_VALUE_TYPE TWO31r;
15684 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15685 NULL, 1, OPTAB_DIRECT);
15687 fp = gen_reg_rtx (DFmode);
15688 emit_insn (gen_floatsidf2 (fp, x));
15690 real_ldexp (&TWO31r, &dconst1, 31);
15691 x = const_double_from_real_value (TWO31r, DFmode);
15693 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15695 emit_move_insn (target, x);
15698 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15699 32-bit mode; otherwise we have a direct convert instruction. */
15702 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15704 REAL_VALUE_TYPE TWO32r;
15705 rtx fp_lo, fp_hi, x;
15707 fp_lo = gen_reg_rtx (DFmode);
15708 fp_hi = gen_reg_rtx (DFmode);
15710 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15712 real_ldexp (&TWO32r, &dconst1, 32);
15713 x = const_double_from_real_value (TWO32r, DFmode);
15714 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15716 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15718 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15721 emit_move_insn (target, x);
15724 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15725 For x86_32, -mfpmath=sse, !optimize_size only. */
15727 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15729 REAL_VALUE_TYPE ONE16r;
15730 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15732 real_ldexp (&ONE16r, &dconst1, 16);
15733 x = const_double_from_real_value (ONE16r, SFmode);
15734 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15735 NULL, 0, OPTAB_DIRECT);
15736 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15737 NULL, 0, OPTAB_DIRECT);
15738 fp_hi = gen_reg_rtx (SFmode);
15739 fp_lo = gen_reg_rtx (SFmode);
15740 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15741 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15742 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15744 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15746 if (!rtx_equal_p (target, fp_hi))
15747 emit_move_insn (target, fp_hi);
15750 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15751 then replicate the value for all elements of the vector
15755 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15762 v = gen_rtvec (4, value, value, value, value);
15763 return gen_rtx_CONST_VECTOR (V4SImode, v);
15767 v = gen_rtvec (2, value, value);
15768 return gen_rtx_CONST_VECTOR (V2DImode, v);
15772 v = gen_rtvec (8, value, value, value, value,
15773 value, value, value, value);
15775 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
15776 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
15777 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
15778 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15779 return gen_rtx_CONST_VECTOR (V8SFmode, v);
15783 v = gen_rtvec (4, value, value, value, value);
15785 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15786 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15787 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15791 v = gen_rtvec (4, value, value, value, value);
15793 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
15794 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
15795 return gen_rtx_CONST_VECTOR (V4DFmode, v);
15799 v = gen_rtvec (2, value, value);
15801 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15802 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15805 gcc_unreachable ();
15809 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15810 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15811 for an SSE register. If VECT is true, then replicate the mask for
15812 all elements of the vector register. If INVERT is true, then create
15813 a mask excluding the sign bit. */
15816 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15818 enum machine_mode vec_mode, imode;
15819 HOST_WIDE_INT hi, lo;
15824 /* Find the sign bit, sign extended to 2*HWI. */
15831 mode = GET_MODE_INNER (mode);
15833 lo = 0x80000000, hi = lo < 0;
15840 mode = GET_MODE_INNER (mode);
15842 if (HOST_BITS_PER_WIDE_INT >= 64)
15843 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15845 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15850 vec_mode = VOIDmode;
15851 if (HOST_BITS_PER_WIDE_INT >= 64)
15854 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15861 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15865 lo = ~lo, hi = ~hi;
15871 mask = immed_double_const (lo, hi, imode);
15873 vec = gen_rtvec (2, v, mask);
15874 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15875 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15882 gcc_unreachable ();
15886 lo = ~lo, hi = ~hi;
15888 /* Force this value into the low part of a fp vector constant. */
15889 mask = immed_double_const (lo, hi, imode);
15890 mask = gen_lowpart (mode, mask);
15892 if (vec_mode == VOIDmode)
15893 return force_reg (mode, mask);
15895 v = ix86_build_const_vector (vec_mode, vect, mask);
15896 return force_reg (vec_mode, v);
15899 /* Generate code for floating point ABS or NEG. */
15902 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15905 rtx mask, set, use, clob, dst, src;
15906 bool use_sse = false;
15907 bool vector_mode = VECTOR_MODE_P (mode);
15908 enum machine_mode vmode = mode;
15912 else if (mode == TFmode)
15914 else if (TARGET_SSE_MATH)
15916 use_sse = SSE_FLOAT_MODE_P (mode);
15917 if (mode == SFmode)
15919 else if (mode == DFmode)
15923 /* NEG and ABS performed with SSE use bitwise mask operations.
15924 Create the appropriate mask now. */
15926 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
15935 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15936 set = gen_rtx_SET (VOIDmode, dst, set);
15941 set = gen_rtx_fmt_e (code, mode, src);
15942 set = gen_rtx_SET (VOIDmode, dst, set);
15945 use = gen_rtx_USE (VOIDmode, mask);
15946 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15947 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15948 gen_rtvec (3, set, use, clob)));
15955 /* Expand a copysign operation. Special case operand 0 being a constant. */
15958 ix86_expand_copysign (rtx operands[])
15960 enum machine_mode mode, vmode;
15961 rtx dest, op0, op1, mask, nmask;
15963 dest = operands[0];
15967 mode = GET_MODE (dest);
15969 if (mode == SFmode)
15971 else if (mode == DFmode)
15976 if (GET_CODE (op0) == CONST_DOUBLE)
15978 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15980 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15981 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15983 if (mode == SFmode || mode == DFmode)
15985 if (op0 == CONST0_RTX (mode))
15986 op0 = CONST0_RTX (vmode);
15989 rtx v = ix86_build_const_vector (vmode, false, op0);
15991 op0 = force_reg (vmode, v);
15994 else if (op0 != CONST0_RTX (mode))
15995 op0 = force_reg (mode, op0);
15997 mask = ix86_build_signbit_mask (vmode, 0, 0);
15999 if (mode == SFmode)
16000 copysign_insn = gen_copysignsf3_const;
16001 else if (mode == DFmode)
16002 copysign_insn = gen_copysigndf3_const;
16004 copysign_insn = gen_copysigntf3_const;
16006 emit_insn (copysign_insn (dest, op0, op1, mask));
16010 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16012 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16013 mask = ix86_build_signbit_mask (vmode, 0, 0);
16015 if (mode == SFmode)
16016 copysign_insn = gen_copysignsf3_var;
16017 else if (mode == DFmode)
16018 copysign_insn = gen_copysigndf3_var;
16020 copysign_insn = gen_copysigntf3_var;
16022 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16026 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16027 be a constant, and so has already been expanded into a vector constant. */
16030 ix86_split_copysign_const (rtx operands[])
16032 enum machine_mode mode, vmode;
16033 rtx dest, op0, mask, x;
16035 dest = operands[0];
16037 mask = operands[3];
16039 mode = GET_MODE (dest);
16040 vmode = GET_MODE (mask);
16042 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16043 x = gen_rtx_AND (vmode, dest, mask);
16044 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16046 if (op0 != CONST0_RTX (vmode))
16048 x = gen_rtx_IOR (vmode, dest, op0);
16049 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16053 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16054 so we have to do two masks. */
16057 ix86_split_copysign_var (rtx operands[])
16059 enum machine_mode mode, vmode;
16060 rtx dest, scratch, op0, op1, mask, nmask, x;
16062 dest = operands[0];
16063 scratch = operands[1];
16066 nmask = operands[4];
16067 mask = operands[5];
16069 mode = GET_MODE (dest);
16070 vmode = GET_MODE (mask);
16072 if (rtx_equal_p (op0, op1))
16074 /* Shouldn't happen often (it's useless, obviously), but when it does
16075 we'd generate incorrect code if we continue below. */
16076 emit_move_insn (dest, op0);
16080 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16082 gcc_assert (REGNO (op1) == REGNO (scratch));
16084 x = gen_rtx_AND (vmode, scratch, mask);
16085 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16088 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16089 x = gen_rtx_NOT (vmode, dest);
16090 x = gen_rtx_AND (vmode, x, op0);
16091 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16095 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16097 x = gen_rtx_AND (vmode, scratch, mask);
16099 else /* alternative 2,4 */
16101 gcc_assert (REGNO (mask) == REGNO (scratch));
16102 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16103 x = gen_rtx_AND (vmode, scratch, op1);
16105 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16107 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16109 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16110 x = gen_rtx_AND (vmode, dest, nmask);
16112 else /* alternative 3,4 */
16114 gcc_assert (REGNO (nmask) == REGNO (dest));
16116 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16117 x = gen_rtx_AND (vmode, dest, op0);
16119 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16122 x = gen_rtx_IOR (vmode, dest, scratch);
16123 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16126 /* Return TRUE or FALSE depending on whether the first SET in INSN
16127 has source and destination with matching CC modes, and that the
16128 CC mode is at least as constrained as REQ_MODE. */
16131 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16134 enum machine_mode set_mode;
16136 set = PATTERN (insn);
16137 if (GET_CODE (set) == PARALLEL)
16138 set = XVECEXP (set, 0, 0);
16139 gcc_assert (GET_CODE (set) == SET);
16140 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16142 set_mode = GET_MODE (SET_DEST (set));
16146 if (req_mode != CCNOmode
16147 && (req_mode != CCmode
16148 || XEXP (SET_SRC (set), 1) != const0_rtx))
16152 if (req_mode == CCGCmode)
16156 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16160 if (req_mode == CCZmode)
16171 gcc_unreachable ();
16174 return GET_MODE (SET_SRC (set)) == set_mode;
16177 /* Generate insn patterns to do an integer compare of OPERANDS. */
16180 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16182 enum machine_mode cmpmode;
16185 cmpmode = SELECT_CC_MODE (code, op0, op1);
16186 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16188 /* This is very simple, but making the interface the same as in the
16189 FP case makes the rest of the code easier. */
16190 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16191 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16193 /* Return the test that should be put into the flags user, i.e.
16194 the bcc, scc, or cmov instruction. */
16195 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16198 /* Figure out whether to use ordered or unordered fp comparisons.
16199 Return the appropriate mode to use. */
16202 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16204 /* ??? In order to make all comparisons reversible, we do all comparisons
16205 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16206 all forms trapping and nontrapping comparisons, we can make inequality
16207 comparisons trapping again, since it results in better code when using
16208 FCOM based compares. */
16209 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16213 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16215 enum machine_mode mode = GET_MODE (op0);
16217 if (SCALAR_FLOAT_MODE_P (mode))
16219 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16220 return ix86_fp_compare_mode (code);
16225 /* Only zero flag is needed. */
16226 case EQ: /* ZF=0 */
16227 case NE: /* ZF!=0 */
16229 /* Codes needing carry flag. */
16230 case GEU: /* CF=0 */
16231 case LTU: /* CF=1 */
16232 /* Detect overflow checks. They need just the carry flag. */
16233 if (GET_CODE (op0) == PLUS
16234 && rtx_equal_p (op1, XEXP (op0, 0)))
16238 case GTU: /* CF=0 & ZF=0 */
16239 case LEU: /* CF=1 | ZF=1 */
16240 /* Detect overflow checks. They need just the carry flag. */
16241 if (GET_CODE (op0) == MINUS
16242 && rtx_equal_p (op1, XEXP (op0, 0)))
16246 /* Codes possibly doable only with sign flag when
16247 comparing against zero. */
16248 case GE: /* SF=OF or SF=0 */
16249 case LT: /* SF<>OF or SF=1 */
16250 if (op1 == const0_rtx)
16253 /* For other cases Carry flag is not required. */
16255 /* Codes doable only with sign flag when comparing
16256 against zero, but we miss jump instruction for it
16257 so we need to use relational tests against overflow
16258 that thus needs to be zero. */
16259 case GT: /* ZF=0 & SF=OF */
16260 case LE: /* ZF=1 | SF<>OF */
16261 if (op1 == const0_rtx)
16265 /* strcmp pattern do (use flags) and combine may ask us for proper
16270 gcc_unreachable ();
16274 /* Return the fixed registers used for condition codes. */
16277 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16284 /* If two condition code modes are compatible, return a condition code
16285 mode which is compatible with both. Otherwise, return
16288 static enum machine_mode
16289 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16294 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16297 if ((m1 == CCGCmode && m2 == CCGOCmode)
16298 || (m1 == CCGOCmode && m2 == CCGCmode))
16304 gcc_unreachable ();
16334 /* These are only compatible with themselves, which we already
16341 /* Return a comparison we can do and that it is equivalent to
16342 swap_condition (code) apart possibly from orderedness.
16343 But, never change orderedness if TARGET_IEEE_FP, returning
16344 UNKNOWN in that case if necessary. */
16346 static enum rtx_code
16347 ix86_fp_swap_condition (enum rtx_code code)
16351 case GT: /* GTU - CF=0 & ZF=0 */
16352 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16353 case GE: /* GEU - CF=0 */
16354 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16355 case UNLT: /* LTU - CF=1 */
16356 return TARGET_IEEE_FP ? UNKNOWN : GT;
16357 case UNLE: /* LEU - CF=1 | ZF=1 */
16358 return TARGET_IEEE_FP ? UNKNOWN : GE;
16360 return swap_condition (code);
16364 /* Return cost of comparison CODE using the best strategy for performance.
16365 All following functions do use number of instructions as a cost metrics.
16366 In future this should be tweaked to compute bytes for optimize_size and
16367 take into account performance of various instructions on various CPUs. */
16370 ix86_fp_comparison_cost (enum rtx_code code)
16374 /* The cost of code using bit-twiddling on %ah. */
16391 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16395 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16398 gcc_unreachable ();
16401 switch (ix86_fp_comparison_strategy (code))
16403 case IX86_FPCMP_COMI:
16404 return arith_cost > 4 ? 3 : 2;
16405 case IX86_FPCMP_SAHF:
16406 return arith_cost > 4 ? 4 : 3;
16412 /* Return strategy to use for floating-point. We assume that fcomi is always
16413 preferrable where available, since that is also true when looking at size
16414 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16416 enum ix86_fpcmp_strategy
16417 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16419 /* Do fcomi/sahf based test when profitable. */
16422 return IX86_FPCMP_COMI;
16424 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16425 return IX86_FPCMP_SAHF;
16427 return IX86_FPCMP_ARITH;
16430 /* Swap, force into registers, or otherwise massage the two operands
16431 to a fp comparison. The operands are updated in place; the new
16432 comparison code is returned. */
16434 static enum rtx_code
16435 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16437 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16438 rtx op0 = *pop0, op1 = *pop1;
16439 enum machine_mode op_mode = GET_MODE (op0);
16440 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16442 /* All of the unordered compare instructions only work on registers.
16443 The same is true of the fcomi compare instructions. The XFmode
16444 compare instructions require registers except when comparing
16445 against zero or when converting operand 1 from fixed point to
16449 && (fpcmp_mode == CCFPUmode
16450 || (op_mode == XFmode
16451 && ! (standard_80387_constant_p (op0) == 1
16452 || standard_80387_constant_p (op1) == 1)
16453 && GET_CODE (op1) != FLOAT)
16454 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16456 op0 = force_reg (op_mode, op0);
16457 op1 = force_reg (op_mode, op1);
16461 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16462 things around if they appear profitable, otherwise force op0
16463 into a register. */
16465 if (standard_80387_constant_p (op0) == 0
16467 && ! (standard_80387_constant_p (op1) == 0
16470 enum rtx_code new_code = ix86_fp_swap_condition (code);
16471 if (new_code != UNKNOWN)
16474 tmp = op0, op0 = op1, op1 = tmp;
16480 op0 = force_reg (op_mode, op0);
16482 if (CONSTANT_P (op1))
16484 int tmp = standard_80387_constant_p (op1);
16486 op1 = validize_mem (force_const_mem (op_mode, op1));
16490 op1 = force_reg (op_mode, op1);
16493 op1 = force_reg (op_mode, op1);
16497 /* Try to rearrange the comparison to make it cheaper. */
16498 if (ix86_fp_comparison_cost (code)
16499 > ix86_fp_comparison_cost (swap_condition (code))
16500 && (REG_P (op1) || can_create_pseudo_p ()))
16503 tmp = op0, op0 = op1, op1 = tmp;
16504 code = swap_condition (code);
16506 op0 = force_reg (op_mode, op0);
16514 /* Convert comparison codes we use to represent FP comparison to integer
16515 code that will result in proper branch. Return UNKNOWN if no such code
16519 ix86_fp_compare_code_to_integer (enum rtx_code code)
16548 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16551 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16553 enum machine_mode fpcmp_mode, intcmp_mode;
16556 fpcmp_mode = ix86_fp_compare_mode (code);
16557 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16559 /* Do fcomi/sahf based test when profitable. */
16560 switch (ix86_fp_comparison_strategy (code))
16562 case IX86_FPCMP_COMI:
16563 intcmp_mode = fpcmp_mode;
16564 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16565 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16570 case IX86_FPCMP_SAHF:
16571 intcmp_mode = fpcmp_mode;
16572 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16573 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16577 scratch = gen_reg_rtx (HImode);
16578 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16579 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16582 case IX86_FPCMP_ARITH:
16583 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16584 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16585 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16587 scratch = gen_reg_rtx (HImode);
16588 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16590 /* In the unordered case, we have to check C2 for NaN's, which
16591 doesn't happen to work out to anything nice combination-wise.
16592 So do some bit twiddling on the value we've got in AH to come
16593 up with an appropriate set of condition codes. */
16595 intcmp_mode = CCNOmode;
16600 if (code == GT || !TARGET_IEEE_FP)
16602 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16607 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16608 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16609 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16610 intcmp_mode = CCmode;
16616 if (code == LT && TARGET_IEEE_FP)
16618 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16619 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16620 intcmp_mode = CCmode;
16625 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16631 if (code == GE || !TARGET_IEEE_FP)
16633 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16638 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16639 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16645 if (code == LE && TARGET_IEEE_FP)
16647 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16648 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16649 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16650 intcmp_mode = CCmode;
16655 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16661 if (code == EQ && TARGET_IEEE_FP)
16663 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16664 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16665 intcmp_mode = CCmode;
16670 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16676 if (code == NE && TARGET_IEEE_FP)
16678 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16679 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16685 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16691 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16695 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16700 gcc_unreachable ();
16708 /* Return the test that should be put into the flags user, i.e.
16709 the bcc, scc, or cmov instruction. */
16710 return gen_rtx_fmt_ee (code, VOIDmode,
16711 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16716 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16720 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16721 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16723 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16725 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16726 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16729 ret = ix86_expand_int_compare (code, op0, op1);
16735 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16737 enum machine_mode mode = GET_MODE (op0);
16749 tmp = ix86_expand_compare (code, op0, op1);
16750 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16751 gen_rtx_LABEL_REF (VOIDmode, label),
16753 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16760 /* Expand DImode branch into multiple compare+branch. */
16762 rtx lo[2], hi[2], label2;
16763 enum rtx_code code1, code2, code3;
16764 enum machine_mode submode;
16766 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16768 tmp = op0, op0 = op1, op1 = tmp;
16769 code = swap_condition (code);
16772 split_double_mode (mode, &op0, 1, lo+0, hi+0);
16773 split_double_mode (mode, &op1, 1, lo+1, hi+1);
16775 submode = mode == DImode ? SImode : DImode;
16777 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16778 avoid two branches. This costs one extra insn, so disable when
16779 optimizing for size. */
16781 if ((code == EQ || code == NE)
16782 && (!optimize_insn_for_size_p ()
16783 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16788 if (hi[1] != const0_rtx)
16789 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16790 NULL_RTX, 0, OPTAB_WIDEN);
16793 if (lo[1] != const0_rtx)
16794 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16795 NULL_RTX, 0, OPTAB_WIDEN);
16797 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16798 NULL_RTX, 0, OPTAB_WIDEN);
16800 ix86_expand_branch (code, tmp, const0_rtx, label);
16804 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16805 op1 is a constant and the low word is zero, then we can just
16806 examine the high word. Similarly for low word -1 and
16807 less-or-equal-than or greater-than. */
16809 if (CONST_INT_P (hi[1]))
16812 case LT: case LTU: case GE: case GEU:
16813 if (lo[1] == const0_rtx)
16815 ix86_expand_branch (code, hi[0], hi[1], label);
16819 case LE: case LEU: case GT: case GTU:
16820 if (lo[1] == constm1_rtx)
16822 ix86_expand_branch (code, hi[0], hi[1], label);
16830 /* Otherwise, we need two or three jumps. */
16832 label2 = gen_label_rtx ();
16835 code2 = swap_condition (code);
16836 code3 = unsigned_condition (code);
16840 case LT: case GT: case LTU: case GTU:
16843 case LE: code1 = LT; code2 = GT; break;
16844 case GE: code1 = GT; code2 = LT; break;
16845 case LEU: code1 = LTU; code2 = GTU; break;
16846 case GEU: code1 = GTU; code2 = LTU; break;
16848 case EQ: code1 = UNKNOWN; code2 = NE; break;
16849 case NE: code2 = UNKNOWN; break;
16852 gcc_unreachable ();
16857 * if (hi(a) < hi(b)) goto true;
16858 * if (hi(a) > hi(b)) goto false;
16859 * if (lo(a) < lo(b)) goto true;
16863 if (code1 != UNKNOWN)
16864 ix86_expand_branch (code1, hi[0], hi[1], label);
16865 if (code2 != UNKNOWN)
16866 ix86_expand_branch (code2, hi[0], hi[1], label2);
16868 ix86_expand_branch (code3, lo[0], lo[1], label);
16870 if (code2 != UNKNOWN)
16871 emit_label (label2);
16876 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16881 /* Split branch based on floating point condition. */
16883 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16884 rtx target1, rtx target2, rtx tmp, rtx pushed)
16889 if (target2 != pc_rtx)
16892 code = reverse_condition_maybe_unordered (code);
16897 condition = ix86_expand_fp_compare (code, op1, op2,
16900 /* Remove pushed operand from stack. */
16902 ix86_free_from_memory (GET_MODE (pushed));
16904 i = emit_jump_insn (gen_rtx_SET
16906 gen_rtx_IF_THEN_ELSE (VOIDmode,
16907 condition, target1, target2)));
16908 if (split_branch_probability >= 0)
16909 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16913 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16917 gcc_assert (GET_MODE (dest) == QImode);
16919 ret = ix86_expand_compare (code, op0, op1);
16920 PUT_MODE (ret, QImode);
16921 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16924 /* Expand comparison setting or clearing carry flag. Return true when
16925 successful and set pop for the operation. */
16927 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16929 enum machine_mode mode =
16930 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16932 /* Do not handle double-mode compares that go through special path. */
16933 if (mode == (TARGET_64BIT ? TImode : DImode))
16936 if (SCALAR_FLOAT_MODE_P (mode))
16938 rtx compare_op, compare_seq;
16940 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16942 /* Shortcut: following common codes never translate
16943 into carry flag compares. */
16944 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16945 || code == ORDERED || code == UNORDERED)
16948 /* These comparisons require zero flag; swap operands so they won't. */
16949 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16950 && !TARGET_IEEE_FP)
16955 code = swap_condition (code);
16958 /* Try to expand the comparison and verify that we end up with
16959 carry flag based comparison. This fails to be true only when
16960 we decide to expand comparison using arithmetic that is not
16961 too common scenario. */
16963 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16964 compare_seq = get_insns ();
16967 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16968 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16969 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16971 code = GET_CODE (compare_op);
16973 if (code != LTU && code != GEU)
16976 emit_insn (compare_seq);
16981 if (!INTEGRAL_MODE_P (mode))
16990 /* Convert a==0 into (unsigned)a<1. */
16993 if (op1 != const0_rtx)
16996 code = (code == EQ ? LTU : GEU);
16999 /* Convert a>b into b<a or a>=b-1. */
17002 if (CONST_INT_P (op1))
17004 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17005 /* Bail out on overflow. We still can swap operands but that
17006 would force loading of the constant into register. */
17007 if (op1 == const0_rtx
17008 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17010 code = (code == GTU ? GEU : LTU);
17017 code = (code == GTU ? LTU : GEU);
17021 /* Convert a>=0 into (unsigned)a<0x80000000. */
17024 if (mode == DImode || op1 != const0_rtx)
17026 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17027 code = (code == LT ? GEU : LTU);
17031 if (mode == DImode || op1 != constm1_rtx)
17033 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17034 code = (code == LE ? GEU : LTU);
17040 /* Swapping operands may cause constant to appear as first operand. */
17041 if (!nonimmediate_operand (op0, VOIDmode))
17043 if (!can_create_pseudo_p ())
17045 op0 = force_reg (mode, op0);
17047 *pop = ix86_expand_compare (code, op0, op1);
17048 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17053 ix86_expand_int_movcc (rtx operands[])
17055 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17056 rtx compare_seq, compare_op;
17057 enum machine_mode mode = GET_MODE (operands[0]);
17058 bool sign_bit_compare_p = false;
17059 rtx op0 = XEXP (operands[1], 0);
17060 rtx op1 = XEXP (operands[1], 1);
17063 compare_op = ix86_expand_compare (code, op0, op1);
17064 compare_seq = get_insns ();
17067 compare_code = GET_CODE (compare_op);
17069 if ((op1 == const0_rtx && (code == GE || code == LT))
17070 || (op1 == constm1_rtx && (code == GT || code == LE)))
17071 sign_bit_compare_p = true;
17073 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17074 HImode insns, we'd be swallowed in word prefix ops. */
17076 if ((mode != HImode || TARGET_FAST_PREFIX)
17077 && (mode != (TARGET_64BIT ? TImode : DImode))
17078 && CONST_INT_P (operands[2])
17079 && CONST_INT_P (operands[3]))
17081 rtx out = operands[0];
17082 HOST_WIDE_INT ct = INTVAL (operands[2]);
17083 HOST_WIDE_INT cf = INTVAL (operands[3]);
17084 HOST_WIDE_INT diff;
17087 /* Sign bit compares are better done using shifts than we do by using
17089 if (sign_bit_compare_p
17090 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17092 /* Detect overlap between destination and compare sources. */
17095 if (!sign_bit_compare_p)
17098 bool fpcmp = false;
17100 compare_code = GET_CODE (compare_op);
17102 flags = XEXP (compare_op, 0);
17104 if (GET_MODE (flags) == CCFPmode
17105 || GET_MODE (flags) == CCFPUmode)
17109 = ix86_fp_compare_code_to_integer (compare_code);
17112 /* To simplify rest of code, restrict to the GEU case. */
17113 if (compare_code == LTU)
17115 HOST_WIDE_INT tmp = ct;
17118 compare_code = reverse_condition (compare_code);
17119 code = reverse_condition (code);
17124 PUT_CODE (compare_op,
17125 reverse_condition_maybe_unordered
17126 (GET_CODE (compare_op)));
17128 PUT_CODE (compare_op,
17129 reverse_condition (GET_CODE (compare_op)));
17133 if (reg_overlap_mentioned_p (out, op0)
17134 || reg_overlap_mentioned_p (out, op1))
17135 tmp = gen_reg_rtx (mode);
17137 if (mode == DImode)
17138 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17140 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17141 flags, compare_op));
17145 if (code == GT || code == GE)
17146 code = reverse_condition (code);
17149 HOST_WIDE_INT tmp = ct;
17154 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17167 tmp = expand_simple_binop (mode, PLUS,
17169 copy_rtx (tmp), 1, OPTAB_DIRECT);
17180 tmp = expand_simple_binop (mode, IOR,
17182 copy_rtx (tmp), 1, OPTAB_DIRECT);
17184 else if (diff == -1 && ct)
17194 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17196 tmp = expand_simple_binop (mode, PLUS,
17197 copy_rtx (tmp), GEN_INT (cf),
17198 copy_rtx (tmp), 1, OPTAB_DIRECT);
17206 * andl cf - ct, dest
17216 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17219 tmp = expand_simple_binop (mode, AND,
17221 gen_int_mode (cf - ct, mode),
17222 copy_rtx (tmp), 1, OPTAB_DIRECT);
17224 tmp = expand_simple_binop (mode, PLUS,
17225 copy_rtx (tmp), GEN_INT (ct),
17226 copy_rtx (tmp), 1, OPTAB_DIRECT);
17229 if (!rtx_equal_p (tmp, out))
17230 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17237 enum machine_mode cmp_mode = GET_MODE (op0);
17240 tmp = ct, ct = cf, cf = tmp;
17243 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17245 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17247 /* We may be reversing unordered compare to normal compare, that
17248 is not valid in general (we may convert non-trapping condition
17249 to trapping one), however on i386 we currently emit all
17250 comparisons unordered. */
17251 compare_code = reverse_condition_maybe_unordered (compare_code);
17252 code = reverse_condition_maybe_unordered (code);
17256 compare_code = reverse_condition (compare_code);
17257 code = reverse_condition (code);
17261 compare_code = UNKNOWN;
17262 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17263 && CONST_INT_P (op1))
17265 if (op1 == const0_rtx
17266 && (code == LT || code == GE))
17267 compare_code = code;
17268 else if (op1 == constm1_rtx)
17272 else if (code == GT)
17277 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17278 if (compare_code != UNKNOWN
17279 && GET_MODE (op0) == GET_MODE (out)
17280 && (cf == -1 || ct == -1))
17282 /* If lea code below could be used, only optimize
17283 if it results in a 2 insn sequence. */
17285 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17286 || diff == 3 || diff == 5 || diff == 9)
17287 || (compare_code == LT && ct == -1)
17288 || (compare_code == GE && cf == -1))
17291 * notl op1 (if necessary)
17299 code = reverse_condition (code);
17302 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17304 out = expand_simple_binop (mode, IOR,
17306 out, 1, OPTAB_DIRECT);
17307 if (out != operands[0])
17308 emit_move_insn (operands[0], out);
17315 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17316 || diff == 3 || diff == 5 || diff == 9)
17317 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17319 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17325 * lea cf(dest*(ct-cf)),dest
17329 * This also catches the degenerate setcc-only case.
17335 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17338 /* On x86_64 the lea instruction operates on Pmode, so we need
17339 to get arithmetics done in proper mode to match. */
17341 tmp = copy_rtx (out);
17345 out1 = copy_rtx (out);
17346 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17350 tmp = gen_rtx_PLUS (mode, tmp, out1);
17356 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17359 if (!rtx_equal_p (tmp, out))
17362 out = force_operand (tmp, copy_rtx (out));
17364 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17366 if (!rtx_equal_p (out, operands[0]))
17367 emit_move_insn (operands[0], copy_rtx (out));
17373 * General case: Jumpful:
17374 * xorl dest,dest cmpl op1, op2
17375 * cmpl op1, op2 movl ct, dest
17376 * setcc dest jcc 1f
17377 * decl dest movl cf, dest
17378 * andl (cf-ct),dest 1:
17381 * Size 20. Size 14.
17383 * This is reasonably steep, but branch mispredict costs are
17384 * high on modern cpus, so consider failing only if optimizing
17388 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17389 && BRANCH_COST (optimize_insn_for_speed_p (),
17394 enum machine_mode cmp_mode = GET_MODE (op0);
17399 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17401 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17403 /* We may be reversing unordered compare to normal compare,
17404 that is not valid in general (we may convert non-trapping
17405 condition to trapping one), however on i386 we currently
17406 emit all comparisons unordered. */
17407 code = reverse_condition_maybe_unordered (code);
17411 code = reverse_condition (code);
17412 if (compare_code != UNKNOWN)
17413 compare_code = reverse_condition (compare_code);
17417 if (compare_code != UNKNOWN)
17419 /* notl op1 (if needed)
17424 For x < 0 (resp. x <= -1) there will be no notl,
17425 so if possible swap the constants to get rid of the
17427 True/false will be -1/0 while code below (store flag
17428 followed by decrement) is 0/-1, so the constants need
17429 to be exchanged once more. */
17431 if (compare_code == GE || !cf)
17433 code = reverse_condition (code);
17438 HOST_WIDE_INT tmp = cf;
17443 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17447 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17449 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17451 copy_rtx (out), 1, OPTAB_DIRECT);
17454 out = expand_simple_binop (mode, AND, copy_rtx (out),
17455 gen_int_mode (cf - ct, mode),
17456 copy_rtx (out), 1, OPTAB_DIRECT);
17458 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17459 copy_rtx (out), 1, OPTAB_DIRECT);
17460 if (!rtx_equal_p (out, operands[0]))
17461 emit_move_insn (operands[0], copy_rtx (out));
17467 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17469 /* Try a few things more with specific constants and a variable. */
17472 rtx var, orig_out, out, tmp;
17474 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17477 /* If one of the two operands is an interesting constant, load a
17478 constant with the above and mask it in with a logical operation. */
17480 if (CONST_INT_P (operands[2]))
17483 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17484 operands[3] = constm1_rtx, op = and_optab;
17485 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17486 operands[3] = const0_rtx, op = ior_optab;
17490 else if (CONST_INT_P (operands[3]))
17493 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17494 operands[2] = constm1_rtx, op = and_optab;
17495 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17496 operands[2] = const0_rtx, op = ior_optab;
17503 orig_out = operands[0];
17504 tmp = gen_reg_rtx (mode);
17507 /* Recurse to get the constant loaded. */
17508 if (ix86_expand_int_movcc (operands) == 0)
17511 /* Mask in the interesting variable. */
17512 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17514 if (!rtx_equal_p (out, orig_out))
17515 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17521 * For comparison with above,
17531 if (! nonimmediate_operand (operands[2], mode))
17532 operands[2] = force_reg (mode, operands[2]);
17533 if (! nonimmediate_operand (operands[3], mode))
17534 operands[3] = force_reg (mode, operands[3]);
17536 if (! register_operand (operands[2], VOIDmode)
17538 || ! register_operand (operands[3], VOIDmode)))
17539 operands[2] = force_reg (mode, operands[2]);
17542 && ! register_operand (operands[3], VOIDmode))
17543 operands[3] = force_reg (mode, operands[3]);
17545 emit_insn (compare_seq);
17546 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17547 gen_rtx_IF_THEN_ELSE (mode,
17548 compare_op, operands[2],
17553 /* Swap, force into registers, or otherwise massage the two operands
17554 to an sse comparison with a mask result. Thus we differ a bit from
17555 ix86_prepare_fp_compare_args which expects to produce a flags result.
17557 The DEST operand exists to help determine whether to commute commutative
17558 operators. The POP0/POP1 operands are updated in place. The new
17559 comparison code is returned, or UNKNOWN if not implementable. */
17561 static enum rtx_code
17562 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17563 rtx *pop0, rtx *pop1)
17571 /* We have no LTGT as an operator. We could implement it with
17572 NE & ORDERED, but this requires an extra temporary. It's
17573 not clear that it's worth it. */
17580 /* These are supported directly. */
17587 /* For commutative operators, try to canonicalize the destination
17588 operand to be first in the comparison - this helps reload to
17589 avoid extra moves. */
17590 if (!dest || !rtx_equal_p (dest, *pop1))
17598 /* These are not supported directly. Swap the comparison operands
17599 to transform into something that is supported. */
17603 code = swap_condition (code);
17607 gcc_unreachable ();
17613 /* Detect conditional moves that exactly match min/max operational
17614 semantics. Note that this is IEEE safe, as long as we don't
17615 interchange the operands.
17617 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17618 and TRUE if the operation is successful and instructions are emitted. */
17621 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17622 rtx cmp_op1, rtx if_true, rtx if_false)
17624 enum machine_mode mode;
17630 else if (code == UNGE)
17633 if_true = if_false;
17639 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17641 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17646 mode = GET_MODE (dest);
17648 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17649 but MODE may be a vector mode and thus not appropriate. */
17650 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17652 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17655 if_true = force_reg (mode, if_true);
17656 v = gen_rtvec (2, if_true, if_false);
17657 tmp = gen_rtx_UNSPEC (mode, v, u);
17661 code = is_min ? SMIN : SMAX;
17662 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17665 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17669 /* Expand an sse vector comparison. Return the register with the result. */
17672 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17673 rtx op_true, rtx op_false)
17675 enum machine_mode mode = GET_MODE (dest);
17678 cmp_op0 = force_reg (mode, cmp_op0);
17679 if (!nonimmediate_operand (cmp_op1, mode))
17680 cmp_op1 = force_reg (mode, cmp_op1);
17683 || reg_overlap_mentioned_p (dest, op_true)
17684 || reg_overlap_mentioned_p (dest, op_false))
17685 dest = gen_reg_rtx (mode);
17687 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17688 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17693 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17694 operations. This is used for both scalar and vector conditional moves. */
17697 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17699 enum machine_mode mode = GET_MODE (dest);
17702 if (op_false == CONST0_RTX (mode))
17704 op_true = force_reg (mode, op_true);
17705 x = gen_rtx_AND (mode, cmp, op_true);
17706 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17708 else if (op_true == CONST0_RTX (mode))
17710 op_false = force_reg (mode, op_false);
17711 x = gen_rtx_NOT (mode, cmp);
17712 x = gen_rtx_AND (mode, x, op_false);
17713 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17715 else if (TARGET_XOP)
17717 rtx pcmov = gen_rtx_SET (mode, dest,
17718 gen_rtx_IF_THEN_ELSE (mode, cmp,
17725 op_true = force_reg (mode, op_true);
17726 op_false = force_reg (mode, op_false);
17728 t2 = gen_reg_rtx (mode);
17730 t3 = gen_reg_rtx (mode);
17734 x = gen_rtx_AND (mode, op_true, cmp);
17735 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17737 x = gen_rtx_NOT (mode, cmp);
17738 x = gen_rtx_AND (mode, x, op_false);
17739 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17741 x = gen_rtx_IOR (mode, t3, t2);
17742 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17746 /* Expand a floating-point conditional move. Return true if successful. */
17749 ix86_expand_fp_movcc (rtx operands[])
17751 enum machine_mode mode = GET_MODE (operands[0]);
17752 enum rtx_code code = GET_CODE (operands[1]);
17753 rtx tmp, compare_op;
17754 rtx op0 = XEXP (operands[1], 0);
17755 rtx op1 = XEXP (operands[1], 1);
17757 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17759 enum machine_mode cmode;
17761 /* Since we've no cmove for sse registers, don't force bad register
17762 allocation just to gain access to it. Deny movcc when the
17763 comparison mode doesn't match the move mode. */
17764 cmode = GET_MODE (op0);
17765 if (cmode == VOIDmode)
17766 cmode = GET_MODE (op1);
17770 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17771 if (code == UNKNOWN)
17774 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17775 operands[2], operands[3]))
17778 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17779 operands[2], operands[3]);
17780 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17784 /* The floating point conditional move instructions don't directly
17785 support conditions resulting from a signed integer comparison. */
17787 compare_op = ix86_expand_compare (code, op0, op1);
17788 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17790 tmp = gen_reg_rtx (QImode);
17791 ix86_expand_setcc (tmp, code, op0, op1);
17793 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17796 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17797 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17798 operands[2], operands[3])));
17803 /* Expand a floating-point vector conditional move; a vcond operation
17804 rather than a movcc operation. */
17807 ix86_expand_fp_vcond (rtx operands[])
17809 enum rtx_code code = GET_CODE (operands[3]);
17812 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17813 &operands[4], &operands[5]);
17814 if (code == UNKNOWN)
17817 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17818 operands[5], operands[1], operands[2]))
17821 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17822 operands[1], operands[2]);
17823 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17827 /* Expand a signed/unsigned integral vector conditional move. */
17830 ix86_expand_int_vcond (rtx operands[])
17832 enum machine_mode mode = GET_MODE (operands[0]);
17833 enum rtx_code code = GET_CODE (operands[3]);
17834 bool negate = false;
17837 cop0 = operands[4];
17838 cop1 = operands[5];
17840 /* XOP supports all of the comparisons on all vector int types. */
17843 /* Canonicalize the comparison to EQ, GT, GTU. */
17854 code = reverse_condition (code);
17860 code = reverse_condition (code);
17866 code = swap_condition (code);
17867 x = cop0, cop0 = cop1, cop1 = x;
17871 gcc_unreachable ();
17874 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17875 if (mode == V2DImode)
17880 /* SSE4.1 supports EQ. */
17881 if (!TARGET_SSE4_1)
17887 /* SSE4.2 supports GT/GTU. */
17888 if (!TARGET_SSE4_2)
17893 gcc_unreachable ();
17897 /* Unsigned parallel compare is not supported by the hardware.
17898 Play some tricks to turn this into a signed comparison
17902 cop0 = force_reg (mode, cop0);
17910 rtx (*gen_sub3) (rtx, rtx, rtx);
17912 /* Subtract (-(INT MAX) - 1) from both operands to make
17914 mask = ix86_build_signbit_mask (mode, true, false);
17915 gen_sub3 = (mode == V4SImode
17916 ? gen_subv4si3 : gen_subv2di3);
17917 t1 = gen_reg_rtx (mode);
17918 emit_insn (gen_sub3 (t1, cop0, mask));
17920 t2 = gen_reg_rtx (mode);
17921 emit_insn (gen_sub3 (t2, cop1, mask));
17931 /* Perform a parallel unsigned saturating subtraction. */
17932 x = gen_reg_rtx (mode);
17933 emit_insn (gen_rtx_SET (VOIDmode, x,
17934 gen_rtx_US_MINUS (mode, cop0, cop1)));
17937 cop1 = CONST0_RTX (mode);
17943 gcc_unreachable ();
17948 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17949 operands[1+negate], operands[2-negate]);
17951 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17952 operands[2-negate]);
17956 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17957 true if we should do zero extension, else sign extension. HIGH_P is
17958 true if we want the N/2 high elements, else the low elements. */
17961 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17963 enum machine_mode imode = GET_MODE (operands[1]);
17964 rtx (*unpack)(rtx, rtx, rtx);
17971 unpack = gen_vec_interleave_highv16qi;
17973 unpack = gen_vec_interleave_lowv16qi;
17977 unpack = gen_vec_interleave_highv8hi;
17979 unpack = gen_vec_interleave_lowv8hi;
17983 unpack = gen_vec_interleave_highv4si;
17985 unpack = gen_vec_interleave_lowv4si;
17988 gcc_unreachable ();
17991 dest = gen_lowpart (imode, operands[0]);
17994 se = force_reg (imode, CONST0_RTX (imode));
17996 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17997 operands[1], pc_rtx, pc_rtx);
17999 emit_insn (unpack (dest, operands[1], se));
18002 /* This function performs the same task as ix86_expand_sse_unpack,
18003 but with SSE4.1 instructions. */
18006 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18008 enum machine_mode imode = GET_MODE (operands[1]);
18009 rtx (*unpack)(rtx, rtx);
18016 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18018 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18022 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18024 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18028 unpack = gen_sse4_1_zero_extendv2siv2di2;
18030 unpack = gen_sse4_1_sign_extendv2siv2di2;
18033 gcc_unreachable ();
18036 dest = operands[0];
18039 /* Shift higher 8 bytes to lower 8 bytes. */
18040 src = gen_reg_rtx (imode);
18041 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18042 gen_lowpart (V1TImode, operands[1]),
18048 emit_insn (unpack (dest, src));
18051 /* Expand conditional increment or decrement using adb/sbb instructions.
18052 The default case using setcc followed by the conditional move can be
18053 done by generic code. */
18055 ix86_expand_int_addcc (rtx operands[])
18057 enum rtx_code code = GET_CODE (operands[1]);
18059 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18061 rtx val = const0_rtx;
18062 bool fpcmp = false;
18063 enum machine_mode mode;
18064 rtx op0 = XEXP (operands[1], 0);
18065 rtx op1 = XEXP (operands[1], 1);
18067 if (operands[3] != const1_rtx
18068 && operands[3] != constm1_rtx)
18070 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18072 code = GET_CODE (compare_op);
18074 flags = XEXP (compare_op, 0);
18076 if (GET_MODE (flags) == CCFPmode
18077 || GET_MODE (flags) == CCFPUmode)
18080 code = ix86_fp_compare_code_to_integer (code);
18087 PUT_CODE (compare_op,
18088 reverse_condition_maybe_unordered
18089 (GET_CODE (compare_op)));
18091 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18094 mode = GET_MODE (operands[0]);
18096 /* Construct either adc or sbb insn. */
18097 if ((code == LTU) == (operands[3] == constm1_rtx))
18102 insn = gen_subqi3_carry;
18105 insn = gen_subhi3_carry;
18108 insn = gen_subsi3_carry;
18111 insn = gen_subdi3_carry;
18114 gcc_unreachable ();
18122 insn = gen_addqi3_carry;
18125 insn = gen_addhi3_carry;
18128 insn = gen_addsi3_carry;
18131 insn = gen_adddi3_carry;
18134 gcc_unreachable ();
18137 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18143 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18144 but works for floating pointer parameters and nonoffsetable memories.
18145 For pushes, it returns just stack offsets; the values will be saved
18146 in the right order. Maximally three parts are generated. */
18149 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18154 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18156 size = (GET_MODE_SIZE (mode) + 4) / 8;
18158 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18159 gcc_assert (size >= 2 && size <= 4);
18161 /* Optimize constant pool reference to immediates. This is used by fp
18162 moves, that force all constants to memory to allow combining. */
18163 if (MEM_P (operand) && MEM_READONLY_P (operand))
18165 rtx tmp = maybe_get_pool_constant (operand);
18170 if (MEM_P (operand) && !offsettable_memref_p (operand))
18172 /* The only non-offsetable memories we handle are pushes. */
18173 int ok = push_operand (operand, VOIDmode);
18177 operand = copy_rtx (operand);
18178 PUT_MODE (operand, Pmode);
18179 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18183 if (GET_CODE (operand) == CONST_VECTOR)
18185 enum machine_mode imode = int_mode_for_mode (mode);
18186 /* Caution: if we looked through a constant pool memory above,
18187 the operand may actually have a different mode now. That's
18188 ok, since we want to pun this all the way back to an integer. */
18189 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18190 gcc_assert (operand != NULL);
18196 if (mode == DImode)
18197 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18202 if (REG_P (operand))
18204 gcc_assert (reload_completed);
18205 for (i = 0; i < size; i++)
18206 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18208 else if (offsettable_memref_p (operand))
18210 operand = adjust_address (operand, SImode, 0);
18211 parts[0] = operand;
18212 for (i = 1; i < size; i++)
18213 parts[i] = adjust_address (operand, SImode, 4 * i);
18215 else if (GET_CODE (operand) == CONST_DOUBLE)
18220 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18224 real_to_target (l, &r, mode);
18225 parts[3] = gen_int_mode (l[3], SImode);
18226 parts[2] = gen_int_mode (l[2], SImode);
18229 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18230 parts[2] = gen_int_mode (l[2], SImode);
18233 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18236 gcc_unreachable ();
18238 parts[1] = gen_int_mode (l[1], SImode);
18239 parts[0] = gen_int_mode (l[0], SImode);
18242 gcc_unreachable ();
18247 if (mode == TImode)
18248 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18249 if (mode == XFmode || mode == TFmode)
18251 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18252 if (REG_P (operand))
18254 gcc_assert (reload_completed);
18255 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18256 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18258 else if (offsettable_memref_p (operand))
18260 operand = adjust_address (operand, DImode, 0);
18261 parts[0] = operand;
18262 parts[1] = adjust_address (operand, upper_mode, 8);
18264 else if (GET_CODE (operand) == CONST_DOUBLE)
18269 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18270 real_to_target (l, &r, mode);
18272 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18273 if (HOST_BITS_PER_WIDE_INT >= 64)
18276 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18277 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18280 parts[0] = immed_double_const (l[0], l[1], DImode);
18282 if (upper_mode == SImode)
18283 parts[1] = gen_int_mode (l[2], SImode);
18284 else if (HOST_BITS_PER_WIDE_INT >= 64)
18287 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18288 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18291 parts[1] = immed_double_const (l[2], l[3], DImode);
18294 gcc_unreachable ();
18301 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18302 Return false when normal moves are needed; true when all required
18303 insns have been emitted. Operands 2-4 contain the input values
18304 int the correct order; operands 5-7 contain the output values. */
18307 ix86_split_long_move (rtx operands[])
18312 int collisions = 0;
18313 enum machine_mode mode = GET_MODE (operands[0]);
18314 bool collisionparts[4];
18316 /* The DFmode expanders may ask us to move double.
18317 For 64bit target this is single move. By hiding the fact
18318 here we simplify i386.md splitters. */
18319 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18321 /* Optimize constant pool reference to immediates. This is used by
18322 fp moves, that force all constants to memory to allow combining. */
18324 if (MEM_P (operands[1])
18325 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18326 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18327 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18328 if (push_operand (operands[0], VOIDmode))
18330 operands[0] = copy_rtx (operands[0]);
18331 PUT_MODE (operands[0], Pmode);
18334 operands[0] = gen_lowpart (DImode, operands[0]);
18335 operands[1] = gen_lowpart (DImode, operands[1]);
18336 emit_move_insn (operands[0], operands[1]);
18340 /* The only non-offsettable memory we handle is push. */
18341 if (push_operand (operands[0], VOIDmode))
18344 gcc_assert (!MEM_P (operands[0])
18345 || offsettable_memref_p (operands[0]));
18347 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18348 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18350 /* When emitting push, take care for source operands on the stack. */
18351 if (push && MEM_P (operands[1])
18352 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18354 rtx src_base = XEXP (part[1][nparts - 1], 0);
18356 /* Compensate for the stack decrement by 4. */
18357 if (!TARGET_64BIT && nparts == 3
18358 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18359 src_base = plus_constant (src_base, 4);
18361 /* src_base refers to the stack pointer and is
18362 automatically decreased by emitted push. */
18363 for (i = 0; i < nparts; i++)
18364 part[1][i] = change_address (part[1][i],
18365 GET_MODE (part[1][i]), src_base);
18368 /* We need to do copy in the right order in case an address register
18369 of the source overlaps the destination. */
18370 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18374 for (i = 0; i < nparts; i++)
18377 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18378 if (collisionparts[i])
18382 /* Collision in the middle part can be handled by reordering. */
18383 if (collisions == 1 && nparts == 3 && collisionparts [1])
18385 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18386 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18388 else if (collisions == 1
18390 && (collisionparts [1] || collisionparts [2]))
18392 if (collisionparts [1])
18394 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18395 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18399 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18400 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18404 /* If there are more collisions, we can't handle it by reordering.
18405 Do an lea to the last part and use only one colliding move. */
18406 else if (collisions > 1)
18412 base = part[0][nparts - 1];
18414 /* Handle the case when the last part isn't valid for lea.
18415 Happens in 64-bit mode storing the 12-byte XFmode. */
18416 if (GET_MODE (base) != Pmode)
18417 base = gen_rtx_REG (Pmode, REGNO (base));
18419 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18420 part[1][0] = replace_equiv_address (part[1][0], base);
18421 for (i = 1; i < nparts; i++)
18423 tmp = plus_constant (base, UNITS_PER_WORD * i);
18424 part[1][i] = replace_equiv_address (part[1][i], tmp);
18435 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18436 emit_insn (gen_addsi3 (stack_pointer_rtx,
18437 stack_pointer_rtx, GEN_INT (-4)));
18438 emit_move_insn (part[0][2], part[1][2]);
18440 else if (nparts == 4)
18442 emit_move_insn (part[0][3], part[1][3]);
18443 emit_move_insn (part[0][2], part[1][2]);
18448 /* In 64bit mode we don't have 32bit push available. In case this is
18449 register, it is OK - we will just use larger counterpart. We also
18450 retype memory - these comes from attempt to avoid REX prefix on
18451 moving of second half of TFmode value. */
18452 if (GET_MODE (part[1][1]) == SImode)
18454 switch (GET_CODE (part[1][1]))
18457 part[1][1] = adjust_address (part[1][1], DImode, 0);
18461 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18465 gcc_unreachable ();
18468 if (GET_MODE (part[1][0]) == SImode)
18469 part[1][0] = part[1][1];
18472 emit_move_insn (part[0][1], part[1][1]);
18473 emit_move_insn (part[0][0], part[1][0]);
18477 /* Choose correct order to not overwrite the source before it is copied. */
18478 if ((REG_P (part[0][0])
18479 && REG_P (part[1][1])
18480 && (REGNO (part[0][0]) == REGNO (part[1][1])
18482 && REGNO (part[0][0]) == REGNO (part[1][2]))
18484 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18486 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18488 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18490 operands[2 + i] = part[0][j];
18491 operands[6 + i] = part[1][j];
18496 for (i = 0; i < nparts; i++)
18498 operands[2 + i] = part[0][i];
18499 operands[6 + i] = part[1][i];
18503 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18504 if (optimize_insn_for_size_p ())
18506 for (j = 0; j < nparts - 1; j++)
18507 if (CONST_INT_P (operands[6 + j])
18508 && operands[6 + j] != const0_rtx
18509 && REG_P (operands[2 + j]))
18510 for (i = j; i < nparts - 1; i++)
18511 if (CONST_INT_P (operands[7 + i])
18512 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18513 operands[7 + i] = operands[2 + j];
18516 for (i = 0; i < nparts; i++)
18517 emit_move_insn (operands[2 + i], operands[6 + i]);
18522 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18523 left shift by a constant, either using a single shift or
18524 a sequence of add instructions. */
18527 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18529 rtx (*insn)(rtx, rtx, rtx);
18532 || (count * ix86_cost->add <= ix86_cost->shift_const
18533 && !optimize_insn_for_size_p ()))
18535 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
18536 while (count-- > 0)
18537 emit_insn (insn (operand, operand, operand));
18541 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18542 emit_insn (insn (operand, operand, GEN_INT (count)));
18547 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18549 rtx (*gen_ashl3)(rtx, rtx, rtx);
18550 rtx (*gen_shld)(rtx, rtx, rtx);
18551 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18553 rtx low[2], high[2];
18556 if (CONST_INT_P (operands[2]))
18558 split_double_mode (mode, operands, 2, low, high);
18559 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18561 if (count >= half_width)
18563 emit_move_insn (high[0], low[1]);
18564 emit_move_insn (low[0], const0_rtx);
18566 if (count > half_width)
18567 ix86_expand_ashl_const (high[0], count - half_width, mode);
18571 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18573 if (!rtx_equal_p (operands[0], operands[1]))
18574 emit_move_insn (operands[0], operands[1]);
18576 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
18577 ix86_expand_ashl_const (low[0], count, mode);
18582 split_double_mode (mode, operands, 1, low, high);
18584 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18586 if (operands[1] == const1_rtx)
18588 /* Assuming we've chosen a QImode capable registers, then 1 << N
18589 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18590 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18592 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18594 ix86_expand_clear (low[0]);
18595 ix86_expand_clear (high[0]);
18596 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
18598 d = gen_lowpart (QImode, low[0]);
18599 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18600 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18601 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18603 d = gen_lowpart (QImode, high[0]);
18604 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18605 s = gen_rtx_NE (QImode, flags, const0_rtx);
18606 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18609 /* Otherwise, we can get the same results by manually performing
18610 a bit extract operation on bit 5/6, and then performing the two
18611 shifts. The two methods of getting 0/1 into low/high are exactly
18612 the same size. Avoiding the shift in the bit extract case helps
18613 pentium4 a bit; no one else seems to care much either way. */
18616 enum machine_mode half_mode;
18617 rtx (*gen_lshr3)(rtx, rtx, rtx);
18618 rtx (*gen_and3)(rtx, rtx, rtx);
18619 rtx (*gen_xor3)(rtx, rtx, rtx);
18620 HOST_WIDE_INT bits;
18623 if (mode == DImode)
18625 half_mode = SImode;
18626 gen_lshr3 = gen_lshrsi3;
18627 gen_and3 = gen_andsi3;
18628 gen_xor3 = gen_xorsi3;
18633 half_mode = DImode;
18634 gen_lshr3 = gen_lshrdi3;
18635 gen_and3 = gen_anddi3;
18636 gen_xor3 = gen_xordi3;
18640 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18641 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
18643 x = gen_lowpart (half_mode, operands[2]);
18644 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18646 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
18647 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
18648 emit_move_insn (low[0], high[0]);
18649 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
18652 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18653 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
18657 if (operands[1] == constm1_rtx)
18659 /* For -1 << N, we can avoid the shld instruction, because we
18660 know that we're shifting 0...31/63 ones into a -1. */
18661 emit_move_insn (low[0], constm1_rtx);
18662 if (optimize_insn_for_size_p ())
18663 emit_move_insn (high[0], low[0]);
18665 emit_move_insn (high[0], constm1_rtx);
18669 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18671 if (!rtx_equal_p (operands[0], operands[1]))
18672 emit_move_insn (operands[0], operands[1]);
18674 split_double_mode (mode, operands, 1, low, high);
18675 emit_insn (gen_shld (high[0], low[0], operands[2]));
18678 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18680 if (TARGET_CMOVE && scratch)
18682 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18683 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18685 ix86_expand_clear (scratch);
18686 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
18690 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18691 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18693 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
18698 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18700 rtx (*gen_ashr3)(rtx, rtx, rtx)
18701 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
18702 rtx (*gen_shrd)(rtx, rtx, rtx);
18703 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18705 rtx low[2], high[2];
18708 if (CONST_INT_P (operands[2]))
18710 split_double_mode (mode, operands, 2, low, high);
18711 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18713 if (count == GET_MODE_BITSIZE (mode) - 1)
18715 emit_move_insn (high[0], high[1]);
18716 emit_insn (gen_ashr3 (high[0], high[0],
18717 GEN_INT (half_width - 1)));
18718 emit_move_insn (low[0], high[0]);
18721 else if (count >= half_width)
18723 emit_move_insn (low[0], high[1]);
18724 emit_move_insn (high[0], low[0]);
18725 emit_insn (gen_ashr3 (high[0], high[0],
18726 GEN_INT (half_width - 1)));
18728 if (count > half_width)
18729 emit_insn (gen_ashr3 (low[0], low[0],
18730 GEN_INT (count - half_width)));
18734 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18736 if (!rtx_equal_p (operands[0], operands[1]))
18737 emit_move_insn (operands[0], operands[1]);
18739 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18740 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
18745 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18747 if (!rtx_equal_p (operands[0], operands[1]))
18748 emit_move_insn (operands[0], operands[1]);
18750 split_double_mode (mode, operands, 1, low, high);
18752 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18753 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
18755 if (TARGET_CMOVE && scratch)
18757 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18758 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18760 emit_move_insn (scratch, high[0]);
18761 emit_insn (gen_ashr3 (scratch, scratch,
18762 GEN_INT (half_width - 1)));
18763 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18768 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
18769 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
18771 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
18777 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18779 rtx (*gen_lshr3)(rtx, rtx, rtx)
18780 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
18781 rtx (*gen_shrd)(rtx, rtx, rtx);
18782 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18784 rtx low[2], high[2];
18787 if (CONST_INT_P (operands[2]))
18789 split_double_mode (mode, operands, 2, low, high);
18790 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18792 if (count >= half_width)
18794 emit_move_insn (low[0], high[1]);
18795 ix86_expand_clear (high[0]);
18797 if (count > half_width)
18798 emit_insn (gen_lshr3 (low[0], low[0],
18799 GEN_INT (count - half_width)));
18803 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18805 if (!rtx_equal_p (operands[0], operands[1]))
18806 emit_move_insn (operands[0], operands[1]);
18808 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18809 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
18814 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18816 if (!rtx_equal_p (operands[0], operands[1]))
18817 emit_move_insn (operands[0], operands[1]);
18819 split_double_mode (mode, operands, 1, low, high);
18821 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18822 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
18824 if (TARGET_CMOVE && scratch)
18826 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18827 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18829 ix86_expand_clear (scratch);
18830 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18835 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18836 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18838 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
18843 /* Predict just emitted jump instruction to be taken with probability PROB. */
18845 predict_jump (int prob)
18847 rtx insn = get_last_insn ();
18848 gcc_assert (JUMP_P (insn));
18849 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18852 /* Helper function for the string operations below. Dest VARIABLE whether
18853 it is aligned to VALUE bytes. If true, jump to the label. */
18855 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18857 rtx label = gen_label_rtx ();
18858 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18859 if (GET_MODE (variable) == DImode)
18860 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18862 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18863 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18866 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18868 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18872 /* Adjust COUNTER by the VALUE. */
18874 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18876 rtx (*gen_add)(rtx, rtx, rtx)
18877 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
18879 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
18882 /* Zero extend possibly SImode EXP to Pmode register. */
18884 ix86_zero_extend_to_Pmode (rtx exp)
18887 if (GET_MODE (exp) == VOIDmode)
18888 return force_reg (Pmode, exp);
18889 if (GET_MODE (exp) == Pmode)
18890 return copy_to_mode_reg (Pmode, exp);
18891 r = gen_reg_rtx (Pmode);
18892 emit_insn (gen_zero_extendsidi2 (r, exp));
18896 /* Divide COUNTREG by SCALE. */
18898 scale_counter (rtx countreg, int scale)
18904 if (CONST_INT_P (countreg))
18905 return GEN_INT (INTVAL (countreg) / scale);
18906 gcc_assert (REG_P (countreg));
18908 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18909 GEN_INT (exact_log2 (scale)),
18910 NULL, 1, OPTAB_DIRECT);
18914 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18915 DImode for constant loop counts. */
18917 static enum machine_mode
18918 counter_mode (rtx count_exp)
18920 if (GET_MODE (count_exp) != VOIDmode)
18921 return GET_MODE (count_exp);
18922 if (!CONST_INT_P (count_exp))
18924 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18929 /* When SRCPTR is non-NULL, output simple loop to move memory
18930 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18931 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18932 equivalent loop to set memory by VALUE (supposed to be in MODE).
18934 The size is rounded down to whole number of chunk size moved at once.
18935 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18939 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18940 rtx destptr, rtx srcptr, rtx value,
18941 rtx count, enum machine_mode mode, int unroll,
18944 rtx out_label, top_label, iter, tmp;
18945 enum machine_mode iter_mode = counter_mode (count);
18946 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18947 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18953 top_label = gen_label_rtx ();
18954 out_label = gen_label_rtx ();
18955 iter = gen_reg_rtx (iter_mode);
18957 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18958 NULL, 1, OPTAB_DIRECT);
18959 /* Those two should combine. */
18960 if (piece_size == const1_rtx)
18962 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18964 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18966 emit_move_insn (iter, const0_rtx);
18968 emit_label (top_label);
18970 tmp = convert_modes (Pmode, iter_mode, iter, true);
18971 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18972 destmem = change_address (destmem, mode, x_addr);
18976 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18977 srcmem = change_address (srcmem, mode, y_addr);
18979 /* When unrolling for chips that reorder memory reads and writes,
18980 we can save registers by using single temporary.
18981 Also using 4 temporaries is overkill in 32bit mode. */
18982 if (!TARGET_64BIT && 0)
18984 for (i = 0; i < unroll; i++)
18989 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18991 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18993 emit_move_insn (destmem, srcmem);
18999 gcc_assert (unroll <= 4);
19000 for (i = 0; i < unroll; i++)
19002 tmpreg[i] = gen_reg_rtx (mode);
19006 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19008 emit_move_insn (tmpreg[i], srcmem);
19010 for (i = 0; i < unroll; i++)
19015 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19017 emit_move_insn (destmem, tmpreg[i]);
19022 for (i = 0; i < unroll; i++)
19026 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19027 emit_move_insn (destmem, value);
19030 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19031 true, OPTAB_LIB_WIDEN);
19033 emit_move_insn (iter, tmp);
19035 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19037 if (expected_size != -1)
19039 expected_size /= GET_MODE_SIZE (mode) * unroll;
19040 if (expected_size == 0)
19042 else if (expected_size > REG_BR_PROB_BASE)
19043 predict_jump (REG_BR_PROB_BASE - 1);
19045 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19048 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19049 iter = ix86_zero_extend_to_Pmode (iter);
19050 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19051 true, OPTAB_LIB_WIDEN);
19052 if (tmp != destptr)
19053 emit_move_insn (destptr, tmp);
19056 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19057 true, OPTAB_LIB_WIDEN);
19059 emit_move_insn (srcptr, tmp);
19061 emit_label (out_label);
19064 /* Output "rep; mov" instruction.
19065 Arguments have same meaning as for previous function */
19067 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19068 rtx destptr, rtx srcptr,
19070 enum machine_mode mode)
19076 /* If the size is known, it is shorter to use rep movs. */
19077 if (mode == QImode && CONST_INT_P (count)
19078 && !(INTVAL (count) & 3))
19081 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19082 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19083 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19084 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19085 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19086 if (mode != QImode)
19088 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19089 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19090 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19091 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19092 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19093 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19097 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19098 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19100 if (CONST_INT_P (count))
19102 count = GEN_INT (INTVAL (count)
19103 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19104 destmem = shallow_copy_rtx (destmem);
19105 srcmem = shallow_copy_rtx (srcmem);
19106 set_mem_size (destmem, count);
19107 set_mem_size (srcmem, count);
19111 if (MEM_SIZE (destmem))
19112 set_mem_size (destmem, NULL_RTX);
19113 if (MEM_SIZE (srcmem))
19114 set_mem_size (srcmem, NULL_RTX);
19116 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19120 /* Output "rep; stos" instruction.
19121 Arguments have same meaning as for previous function */
19123 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19124 rtx count, enum machine_mode mode,
19130 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19131 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19132 value = force_reg (mode, gen_lowpart (mode, value));
19133 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19134 if (mode != QImode)
19136 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19137 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19138 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19141 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19142 if (orig_value == const0_rtx && CONST_INT_P (count))
19144 count = GEN_INT (INTVAL (count)
19145 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19146 destmem = shallow_copy_rtx (destmem);
19147 set_mem_size (destmem, count);
19149 else if (MEM_SIZE (destmem))
19150 set_mem_size (destmem, NULL_RTX);
19151 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19155 emit_strmov (rtx destmem, rtx srcmem,
19156 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19158 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19159 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19160 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19163 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19165 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19166 rtx destptr, rtx srcptr, rtx count, int max_size)
19169 if (CONST_INT_P (count))
19171 HOST_WIDE_INT countval = INTVAL (count);
19174 if ((countval & 0x10) && max_size > 16)
19178 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19179 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19182 gcc_unreachable ();
19185 if ((countval & 0x08) && max_size > 8)
19188 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19191 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19192 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19196 if ((countval & 0x04) && max_size > 4)
19198 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19201 if ((countval & 0x02) && max_size > 2)
19203 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19206 if ((countval & 0x01) && max_size > 1)
19208 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19215 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19216 count, 1, OPTAB_DIRECT);
19217 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19218 count, QImode, 1, 4);
19222 /* When there are stringops, we can cheaply increase dest and src pointers.
19223 Otherwise we save code size by maintaining offset (zero is readily
19224 available from preceding rep operation) and using x86 addressing modes.
19226 if (TARGET_SINGLE_STRINGOP)
19230 rtx label = ix86_expand_aligntest (count, 4, true);
19231 src = change_address (srcmem, SImode, srcptr);
19232 dest = change_address (destmem, SImode, destptr);
19233 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19234 emit_label (label);
19235 LABEL_NUSES (label) = 1;
19239 rtx label = ix86_expand_aligntest (count, 2, true);
19240 src = change_address (srcmem, HImode, srcptr);
19241 dest = change_address (destmem, HImode, destptr);
19242 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19243 emit_label (label);
19244 LABEL_NUSES (label) = 1;
19248 rtx label = ix86_expand_aligntest (count, 1, true);
19249 src = change_address (srcmem, QImode, srcptr);
19250 dest = change_address (destmem, QImode, destptr);
19251 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19252 emit_label (label);
19253 LABEL_NUSES (label) = 1;
19258 rtx offset = force_reg (Pmode, const0_rtx);
19263 rtx label = ix86_expand_aligntest (count, 4, true);
19264 src = change_address (srcmem, SImode, srcptr);
19265 dest = change_address (destmem, SImode, destptr);
19266 emit_move_insn (dest, src);
19267 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19268 true, OPTAB_LIB_WIDEN);
19270 emit_move_insn (offset, tmp);
19271 emit_label (label);
19272 LABEL_NUSES (label) = 1;
19276 rtx label = ix86_expand_aligntest (count, 2, true);
19277 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19278 src = change_address (srcmem, HImode, tmp);
19279 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19280 dest = change_address (destmem, HImode, tmp);
19281 emit_move_insn (dest, src);
19282 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19283 true, OPTAB_LIB_WIDEN);
19285 emit_move_insn (offset, tmp);
19286 emit_label (label);
19287 LABEL_NUSES (label) = 1;
19291 rtx label = ix86_expand_aligntest (count, 1, true);
19292 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19293 src = change_address (srcmem, QImode, tmp);
19294 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19295 dest = change_address (destmem, QImode, tmp);
19296 emit_move_insn (dest, src);
19297 emit_label (label);
19298 LABEL_NUSES (label) = 1;
19303 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19305 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19306 rtx count, int max_size)
19309 expand_simple_binop (counter_mode (count), AND, count,
19310 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19311 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19312 gen_lowpart (QImode, value), count, QImode,
19316 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19318 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19322 if (CONST_INT_P (count))
19324 HOST_WIDE_INT countval = INTVAL (count);
19327 if ((countval & 0x10) && max_size > 16)
19331 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19332 emit_insn (gen_strset (destptr, dest, value));
19333 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19334 emit_insn (gen_strset (destptr, dest, value));
19337 gcc_unreachable ();
19340 if ((countval & 0x08) && max_size > 8)
19344 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19345 emit_insn (gen_strset (destptr, dest, value));
19349 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19350 emit_insn (gen_strset (destptr, dest, value));
19351 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19352 emit_insn (gen_strset (destptr, dest, value));
19356 if ((countval & 0x04) && max_size > 4)
19358 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19359 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19362 if ((countval & 0x02) && max_size > 2)
19364 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19365 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19368 if ((countval & 0x01) && max_size > 1)
19370 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19371 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19378 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19383 rtx label = ix86_expand_aligntest (count, 16, true);
19386 dest = change_address (destmem, DImode, destptr);
19387 emit_insn (gen_strset (destptr, dest, value));
19388 emit_insn (gen_strset (destptr, dest, value));
19392 dest = change_address (destmem, SImode, destptr);
19393 emit_insn (gen_strset (destptr, dest, value));
19394 emit_insn (gen_strset (destptr, dest, value));
19395 emit_insn (gen_strset (destptr, dest, value));
19396 emit_insn (gen_strset (destptr, dest, value));
19398 emit_label (label);
19399 LABEL_NUSES (label) = 1;
19403 rtx label = ix86_expand_aligntest (count, 8, true);
19406 dest = change_address (destmem, DImode, destptr);
19407 emit_insn (gen_strset (destptr, dest, value));
19411 dest = change_address (destmem, SImode, destptr);
19412 emit_insn (gen_strset (destptr, dest, value));
19413 emit_insn (gen_strset (destptr, dest, value));
19415 emit_label (label);
19416 LABEL_NUSES (label) = 1;
19420 rtx label = ix86_expand_aligntest (count, 4, true);
19421 dest = change_address (destmem, SImode, destptr);
19422 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19423 emit_label (label);
19424 LABEL_NUSES (label) = 1;
19428 rtx label = ix86_expand_aligntest (count, 2, true);
19429 dest = change_address (destmem, HImode, destptr);
19430 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19431 emit_label (label);
19432 LABEL_NUSES (label) = 1;
19436 rtx label = ix86_expand_aligntest (count, 1, true);
19437 dest = change_address (destmem, QImode, destptr);
19438 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19439 emit_label (label);
19440 LABEL_NUSES (label) = 1;
19444 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19445 DESIRED_ALIGNMENT. */
19447 expand_movmem_prologue (rtx destmem, rtx srcmem,
19448 rtx destptr, rtx srcptr, rtx count,
19449 int align, int desired_alignment)
19451 if (align <= 1 && desired_alignment > 1)
19453 rtx label = ix86_expand_aligntest (destptr, 1, false);
19454 srcmem = change_address (srcmem, QImode, srcptr);
19455 destmem = change_address (destmem, QImode, destptr);
19456 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19457 ix86_adjust_counter (count, 1);
19458 emit_label (label);
19459 LABEL_NUSES (label) = 1;
19461 if (align <= 2 && desired_alignment > 2)
19463 rtx label = ix86_expand_aligntest (destptr, 2, false);
19464 srcmem = change_address (srcmem, HImode, srcptr);
19465 destmem = change_address (destmem, HImode, destptr);
19466 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19467 ix86_adjust_counter (count, 2);
19468 emit_label (label);
19469 LABEL_NUSES (label) = 1;
19471 if (align <= 4 && desired_alignment > 4)
19473 rtx label = ix86_expand_aligntest (destptr, 4, false);
19474 srcmem = change_address (srcmem, SImode, srcptr);
19475 destmem = change_address (destmem, SImode, destptr);
19476 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19477 ix86_adjust_counter (count, 4);
19478 emit_label (label);
19479 LABEL_NUSES (label) = 1;
19481 gcc_assert (desired_alignment <= 8);
19484 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19485 ALIGN_BYTES is how many bytes need to be copied. */
19487 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19488 int desired_align, int align_bytes)
19491 rtx src_size, dst_size;
19493 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19494 if (src_align_bytes >= 0)
19495 src_align_bytes = desired_align - src_align_bytes;
19496 src_size = MEM_SIZE (src);
19497 dst_size = MEM_SIZE (dst);
19498 if (align_bytes & 1)
19500 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19501 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19503 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19505 if (align_bytes & 2)
19507 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19508 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19509 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19510 set_mem_align (dst, 2 * BITS_PER_UNIT);
19511 if (src_align_bytes >= 0
19512 && (src_align_bytes & 1) == (align_bytes & 1)
19513 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19514 set_mem_align (src, 2 * BITS_PER_UNIT);
19516 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19518 if (align_bytes & 4)
19520 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19521 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19522 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19523 set_mem_align (dst, 4 * BITS_PER_UNIT);
19524 if (src_align_bytes >= 0)
19526 unsigned int src_align = 0;
19527 if ((src_align_bytes & 3) == (align_bytes & 3))
19529 else if ((src_align_bytes & 1) == (align_bytes & 1))
19531 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19532 set_mem_align (src, src_align * BITS_PER_UNIT);
19535 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19537 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19538 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19539 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19540 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19541 if (src_align_bytes >= 0)
19543 unsigned int src_align = 0;
19544 if ((src_align_bytes & 7) == (align_bytes & 7))
19546 else if ((src_align_bytes & 3) == (align_bytes & 3))
19548 else if ((src_align_bytes & 1) == (align_bytes & 1))
19550 if (src_align > (unsigned int) desired_align)
19551 src_align = desired_align;
19552 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19553 set_mem_align (src, src_align * BITS_PER_UNIT);
19556 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19558 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19563 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19564 DESIRED_ALIGNMENT. */
19566 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19567 int align, int desired_alignment)
19569 if (align <= 1 && desired_alignment > 1)
19571 rtx label = ix86_expand_aligntest (destptr, 1, false);
19572 destmem = change_address (destmem, QImode, destptr);
19573 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19574 ix86_adjust_counter (count, 1);
19575 emit_label (label);
19576 LABEL_NUSES (label) = 1;
19578 if (align <= 2 && desired_alignment > 2)
19580 rtx label = ix86_expand_aligntest (destptr, 2, false);
19581 destmem = change_address (destmem, HImode, destptr);
19582 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19583 ix86_adjust_counter (count, 2);
19584 emit_label (label);
19585 LABEL_NUSES (label) = 1;
19587 if (align <= 4 && desired_alignment > 4)
19589 rtx label = ix86_expand_aligntest (destptr, 4, false);
19590 destmem = change_address (destmem, SImode, destptr);
19591 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19592 ix86_adjust_counter (count, 4);
19593 emit_label (label);
19594 LABEL_NUSES (label) = 1;
19596 gcc_assert (desired_alignment <= 8);
19599 /* Set enough from DST to align DST known to by aligned by ALIGN to
19600 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19602 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19603 int desired_align, int align_bytes)
19606 rtx dst_size = MEM_SIZE (dst);
19607 if (align_bytes & 1)
19609 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19611 emit_insn (gen_strset (destreg, dst,
19612 gen_lowpart (QImode, value)));
19614 if (align_bytes & 2)
19616 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19617 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19618 set_mem_align (dst, 2 * BITS_PER_UNIT);
19620 emit_insn (gen_strset (destreg, dst,
19621 gen_lowpart (HImode, value)));
19623 if (align_bytes & 4)
19625 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19626 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19627 set_mem_align (dst, 4 * BITS_PER_UNIT);
19629 emit_insn (gen_strset (destreg, dst,
19630 gen_lowpart (SImode, value)));
19632 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19633 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19634 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19636 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19640 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19641 static enum stringop_alg
19642 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19643 int *dynamic_check)
19645 const struct stringop_algs * algs;
19646 bool optimize_for_speed;
19647 /* Algorithms using the rep prefix want at least edi and ecx;
19648 additionally, memset wants eax and memcpy wants esi. Don't
19649 consider such algorithms if the user has appropriated those
19650 registers for their own purposes. */
19651 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19653 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19655 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19656 || (alg != rep_prefix_1_byte \
19657 && alg != rep_prefix_4_byte \
19658 && alg != rep_prefix_8_byte))
19659 const struct processor_costs *cost;
19661 /* Even if the string operation call is cold, we still might spend a lot
19662 of time processing large blocks. */
19663 if (optimize_function_for_size_p (cfun)
19664 || (optimize_insn_for_size_p ()
19665 && expected_size != -1 && expected_size < 256))
19666 optimize_for_speed = false;
19668 optimize_for_speed = true;
19670 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19672 *dynamic_check = -1;
19674 algs = &cost->memset[TARGET_64BIT != 0];
19676 algs = &cost->memcpy[TARGET_64BIT != 0];
19677 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19678 return stringop_alg;
19679 /* rep; movq or rep; movl is the smallest variant. */
19680 else if (!optimize_for_speed)
19682 if (!count || (count & 3))
19683 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19685 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19687 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19689 else if (expected_size != -1 && expected_size < 4)
19690 return loop_1_byte;
19691 else if (expected_size != -1)
19694 enum stringop_alg alg = libcall;
19695 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19697 /* We get here if the algorithms that were not libcall-based
19698 were rep-prefix based and we are unable to use rep prefixes
19699 based on global register usage. Break out of the loop and
19700 use the heuristic below. */
19701 if (algs->size[i].max == 0)
19703 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19705 enum stringop_alg candidate = algs->size[i].alg;
19707 if (candidate != libcall && ALG_USABLE_P (candidate))
19709 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19710 last non-libcall inline algorithm. */
19711 if (TARGET_INLINE_ALL_STRINGOPS)
19713 /* When the current size is best to be copied by a libcall,
19714 but we are still forced to inline, run the heuristic below
19715 that will pick code for medium sized blocks. */
19716 if (alg != libcall)
19720 else if (ALG_USABLE_P (candidate))
19724 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19726 /* When asked to inline the call anyway, try to pick meaningful choice.
19727 We look for maximal size of block that is faster to copy by hand and
19728 take blocks of at most of that size guessing that average size will
19729 be roughly half of the block.
19731 If this turns out to be bad, we might simply specify the preferred
19732 choice in ix86_costs. */
19733 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19734 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19737 enum stringop_alg alg;
19739 bool any_alg_usable_p = true;
19741 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19743 enum stringop_alg candidate = algs->size[i].alg;
19744 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19746 if (candidate != libcall && candidate
19747 && ALG_USABLE_P (candidate))
19748 max = algs->size[i].max;
19750 /* If there aren't any usable algorithms, then recursing on
19751 smaller sizes isn't going to find anything. Just return the
19752 simple byte-at-a-time copy loop. */
19753 if (!any_alg_usable_p)
19755 /* Pick something reasonable. */
19756 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19757 *dynamic_check = 128;
19758 return loop_1_byte;
19762 alg = decide_alg (count, max / 2, memset, dynamic_check);
19763 gcc_assert (*dynamic_check == -1);
19764 gcc_assert (alg != libcall);
19765 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19766 *dynamic_check = max;
19769 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19770 #undef ALG_USABLE_P
19773 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19774 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19776 decide_alignment (int align,
19777 enum stringop_alg alg,
19780 int desired_align = 0;
19784 gcc_unreachable ();
19786 case unrolled_loop:
19787 desired_align = GET_MODE_SIZE (Pmode);
19789 case rep_prefix_8_byte:
19792 case rep_prefix_4_byte:
19793 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19794 copying whole cacheline at once. */
19795 if (TARGET_PENTIUMPRO)
19800 case rep_prefix_1_byte:
19801 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19802 copying whole cacheline at once. */
19803 if (TARGET_PENTIUMPRO)
19817 if (desired_align < align)
19818 desired_align = align;
19819 if (expected_size != -1 && expected_size < 4)
19820 desired_align = align;
19821 return desired_align;
19824 /* Return the smallest power of 2 greater than VAL. */
19826 smallest_pow2_greater_than (int val)
19834 /* Expand string move (memcpy) operation. Use i386 string operations when
19835 profitable. expand_setmem contains similar code. The code depends upon
19836 architecture, block size and alignment, but always has the same
19839 1) Prologue guard: Conditional that jumps up to epilogues for small
19840 blocks that can be handled by epilogue alone. This is faster but
19841 also needed for correctness, since prologue assume the block is larger
19842 than the desired alignment.
19844 Optional dynamic check for size and libcall for large
19845 blocks is emitted here too, with -minline-stringops-dynamically.
19847 2) Prologue: copy first few bytes in order to get destination aligned
19848 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19849 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19850 We emit either a jump tree on power of two sized blocks, or a byte loop.
19852 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19853 with specified algorithm.
19855 4) Epilogue: code copying tail of the block that is too small to be
19856 handled by main body (or up to size guarded by prologue guard). */
19859 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19860 rtx expected_align_exp, rtx expected_size_exp)
19866 rtx jump_around_label = NULL;
19867 HOST_WIDE_INT align = 1;
19868 unsigned HOST_WIDE_INT count = 0;
19869 HOST_WIDE_INT expected_size = -1;
19870 int size_needed = 0, epilogue_size_needed;
19871 int desired_align = 0, align_bytes = 0;
19872 enum stringop_alg alg;
19874 bool need_zero_guard = false;
19876 if (CONST_INT_P (align_exp))
19877 align = INTVAL (align_exp);
19878 /* i386 can do misaligned access on reasonably increased cost. */
19879 if (CONST_INT_P (expected_align_exp)
19880 && INTVAL (expected_align_exp) > align)
19881 align = INTVAL (expected_align_exp);
19882 /* ALIGN is the minimum of destination and source alignment, but we care here
19883 just about destination alignment. */
19884 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19885 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19887 if (CONST_INT_P (count_exp))
19888 count = expected_size = INTVAL (count_exp);
19889 if (CONST_INT_P (expected_size_exp) && count == 0)
19890 expected_size = INTVAL (expected_size_exp);
19892 /* Make sure we don't need to care about overflow later on. */
19893 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19896 /* Step 0: Decide on preferred algorithm, desired alignment and
19897 size of chunks to be copied by main loop. */
19899 alg = decide_alg (count, expected_size, false, &dynamic_check);
19900 desired_align = decide_alignment (align, alg, expected_size);
19902 if (!TARGET_ALIGN_STRINGOPS)
19903 align = desired_align;
19905 if (alg == libcall)
19907 gcc_assert (alg != no_stringop);
19909 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19910 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19911 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19916 gcc_unreachable ();
19918 need_zero_guard = true;
19919 size_needed = GET_MODE_SIZE (Pmode);
19921 case unrolled_loop:
19922 need_zero_guard = true;
19923 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19925 case rep_prefix_8_byte:
19928 case rep_prefix_4_byte:
19931 case rep_prefix_1_byte:
19935 need_zero_guard = true;
19940 epilogue_size_needed = size_needed;
19942 /* Step 1: Prologue guard. */
19944 /* Alignment code needs count to be in register. */
19945 if (CONST_INT_P (count_exp) && desired_align > align)
19947 if (INTVAL (count_exp) > desired_align
19948 && INTVAL (count_exp) > size_needed)
19951 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19952 if (align_bytes <= 0)
19955 align_bytes = desired_align - align_bytes;
19957 if (align_bytes == 0)
19958 count_exp = force_reg (counter_mode (count_exp), count_exp);
19960 gcc_assert (desired_align >= 1 && align >= 1);
19962 /* Ensure that alignment prologue won't copy past end of block. */
19963 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19965 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19966 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19967 Make sure it is power of 2. */
19968 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19972 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19974 /* If main algorithm works on QImode, no epilogue is needed.
19975 For small sizes just don't align anything. */
19976 if (size_needed == 1)
19977 desired_align = align;
19984 label = gen_label_rtx ();
19985 emit_cmp_and_jump_insns (count_exp,
19986 GEN_INT (epilogue_size_needed),
19987 LTU, 0, counter_mode (count_exp), 1, label);
19988 if (expected_size == -1 || expected_size < epilogue_size_needed)
19989 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19991 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19995 /* Emit code to decide on runtime whether library call or inline should be
19997 if (dynamic_check != -1)
19999 if (CONST_INT_P (count_exp))
20001 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20003 emit_block_move_via_libcall (dst, src, count_exp, false);
20004 count_exp = const0_rtx;
20010 rtx hot_label = gen_label_rtx ();
20011 jump_around_label = gen_label_rtx ();
20012 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20013 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20014 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20015 emit_block_move_via_libcall (dst, src, count_exp, false);
20016 emit_jump (jump_around_label);
20017 emit_label (hot_label);
20021 /* Step 2: Alignment prologue. */
20023 if (desired_align > align)
20025 if (align_bytes == 0)
20027 /* Except for the first move in epilogue, we no longer know
20028 constant offset in aliasing info. It don't seems to worth
20029 the pain to maintain it for the first move, so throw away
20031 src = change_address (src, BLKmode, srcreg);
20032 dst = change_address (dst, BLKmode, destreg);
20033 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20038 /* If we know how many bytes need to be stored before dst is
20039 sufficiently aligned, maintain aliasing info accurately. */
20040 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20041 desired_align, align_bytes);
20042 count_exp = plus_constant (count_exp, -align_bytes);
20043 count -= align_bytes;
20045 if (need_zero_guard
20046 && (count < (unsigned HOST_WIDE_INT) size_needed
20047 || (align_bytes == 0
20048 && count < ((unsigned HOST_WIDE_INT) size_needed
20049 + desired_align - align))))
20051 /* It is possible that we copied enough so the main loop will not
20053 gcc_assert (size_needed > 1);
20054 if (label == NULL_RTX)
20055 label = gen_label_rtx ();
20056 emit_cmp_and_jump_insns (count_exp,
20057 GEN_INT (size_needed),
20058 LTU, 0, counter_mode (count_exp), 1, label);
20059 if (expected_size == -1
20060 || expected_size < (desired_align - align) / 2 + size_needed)
20061 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20063 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20066 if (label && size_needed == 1)
20068 emit_label (label);
20069 LABEL_NUSES (label) = 1;
20071 epilogue_size_needed = 1;
20073 else if (label == NULL_RTX)
20074 epilogue_size_needed = size_needed;
20076 /* Step 3: Main loop. */
20082 gcc_unreachable ();
20084 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20085 count_exp, QImode, 1, expected_size);
20088 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20089 count_exp, Pmode, 1, expected_size);
20091 case unrolled_loop:
20092 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20093 registers for 4 temporaries anyway. */
20094 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20095 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20098 case rep_prefix_8_byte:
20099 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20102 case rep_prefix_4_byte:
20103 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20106 case rep_prefix_1_byte:
20107 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20111 /* Adjust properly the offset of src and dest memory for aliasing. */
20112 if (CONST_INT_P (count_exp))
20114 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20115 (count / size_needed) * size_needed);
20116 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20117 (count / size_needed) * size_needed);
20121 src = change_address (src, BLKmode, srcreg);
20122 dst = change_address (dst, BLKmode, destreg);
20125 /* Step 4: Epilogue to copy the remaining bytes. */
20129 /* When the main loop is done, COUNT_EXP might hold original count,
20130 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20131 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20132 bytes. Compensate if needed. */
20134 if (size_needed < epilogue_size_needed)
20137 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20138 GEN_INT (size_needed - 1), count_exp, 1,
20140 if (tmp != count_exp)
20141 emit_move_insn (count_exp, tmp);
20143 emit_label (label);
20144 LABEL_NUSES (label) = 1;
20147 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20148 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20149 epilogue_size_needed);
20150 if (jump_around_label)
20151 emit_label (jump_around_label);
20155 /* Helper function for memcpy. For QImode value 0xXY produce
20156 0xXYXYXYXY of wide specified by MODE. This is essentially
20157 a * 0x10101010, but we can do slightly better than
20158 synth_mult by unwinding the sequence by hand on CPUs with
20161 promote_duplicated_reg (enum machine_mode mode, rtx val)
20163 enum machine_mode valmode = GET_MODE (val);
20165 int nops = mode == DImode ? 3 : 2;
20167 gcc_assert (mode == SImode || mode == DImode);
20168 if (val == const0_rtx)
20169 return copy_to_mode_reg (mode, const0_rtx);
20170 if (CONST_INT_P (val))
20172 HOST_WIDE_INT v = INTVAL (val) & 255;
20176 if (mode == DImode)
20177 v |= (v << 16) << 16;
20178 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20181 if (valmode == VOIDmode)
20183 if (valmode != QImode)
20184 val = gen_lowpart (QImode, val);
20185 if (mode == QImode)
20187 if (!TARGET_PARTIAL_REG_STALL)
20189 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20190 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20191 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20192 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20194 rtx reg = convert_modes (mode, QImode, val, true);
20195 tmp = promote_duplicated_reg (mode, const1_rtx);
20196 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20201 rtx reg = convert_modes (mode, QImode, val, true);
20203 if (!TARGET_PARTIAL_REG_STALL)
20204 if (mode == SImode)
20205 emit_insn (gen_movsi_insv_1 (reg, reg));
20207 emit_insn (gen_movdi_insv_1 (reg, reg));
20210 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20211 NULL, 1, OPTAB_DIRECT);
20213 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20215 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20216 NULL, 1, OPTAB_DIRECT);
20217 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20218 if (mode == SImode)
20220 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20221 NULL, 1, OPTAB_DIRECT);
20222 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20227 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20228 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20229 alignment from ALIGN to DESIRED_ALIGN. */
20231 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20236 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20237 promoted_val = promote_duplicated_reg (DImode, val);
20238 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20239 promoted_val = promote_duplicated_reg (SImode, val);
20240 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20241 promoted_val = promote_duplicated_reg (HImode, val);
20243 promoted_val = val;
20245 return promoted_val;
20248 /* Expand string clear operation (bzero). Use i386 string operations when
20249 profitable. See expand_movmem comment for explanation of individual
20250 steps performed. */
20252 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20253 rtx expected_align_exp, rtx expected_size_exp)
20258 rtx jump_around_label = NULL;
20259 HOST_WIDE_INT align = 1;
20260 unsigned HOST_WIDE_INT count = 0;
20261 HOST_WIDE_INT expected_size = -1;
20262 int size_needed = 0, epilogue_size_needed;
20263 int desired_align = 0, align_bytes = 0;
20264 enum stringop_alg alg;
20265 rtx promoted_val = NULL;
20266 bool force_loopy_epilogue = false;
20268 bool need_zero_guard = false;
20270 if (CONST_INT_P (align_exp))
20271 align = INTVAL (align_exp);
20272 /* i386 can do misaligned access on reasonably increased cost. */
20273 if (CONST_INT_P (expected_align_exp)
20274 && INTVAL (expected_align_exp) > align)
20275 align = INTVAL (expected_align_exp);
20276 if (CONST_INT_P (count_exp))
20277 count = expected_size = INTVAL (count_exp);
20278 if (CONST_INT_P (expected_size_exp) && count == 0)
20279 expected_size = INTVAL (expected_size_exp);
20281 /* Make sure we don't need to care about overflow later on. */
20282 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20285 /* Step 0: Decide on preferred algorithm, desired alignment and
20286 size of chunks to be copied by main loop. */
20288 alg = decide_alg (count, expected_size, true, &dynamic_check);
20289 desired_align = decide_alignment (align, alg, expected_size);
20291 if (!TARGET_ALIGN_STRINGOPS)
20292 align = desired_align;
20294 if (alg == libcall)
20296 gcc_assert (alg != no_stringop);
20298 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20299 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20304 gcc_unreachable ();
20306 need_zero_guard = true;
20307 size_needed = GET_MODE_SIZE (Pmode);
20309 case unrolled_loop:
20310 need_zero_guard = true;
20311 size_needed = GET_MODE_SIZE (Pmode) * 4;
20313 case rep_prefix_8_byte:
20316 case rep_prefix_4_byte:
20319 case rep_prefix_1_byte:
20323 need_zero_guard = true;
20327 epilogue_size_needed = size_needed;
20329 /* Step 1: Prologue guard. */
20331 /* Alignment code needs count to be in register. */
20332 if (CONST_INT_P (count_exp) && desired_align > align)
20334 if (INTVAL (count_exp) > desired_align
20335 && INTVAL (count_exp) > size_needed)
20338 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20339 if (align_bytes <= 0)
20342 align_bytes = desired_align - align_bytes;
20344 if (align_bytes == 0)
20346 enum machine_mode mode = SImode;
20347 if (TARGET_64BIT && (count & ~0xffffffff))
20349 count_exp = force_reg (mode, count_exp);
20352 /* Do the cheap promotion to allow better CSE across the
20353 main loop and epilogue (ie one load of the big constant in the
20354 front of all code. */
20355 if (CONST_INT_P (val_exp))
20356 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20357 desired_align, align);
20358 /* Ensure that alignment prologue won't copy past end of block. */
20359 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20361 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20362 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20363 Make sure it is power of 2. */
20364 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20366 /* To improve performance of small blocks, we jump around the VAL
20367 promoting mode. This mean that if the promoted VAL is not constant,
20368 we might not use it in the epilogue and have to use byte
20370 if (epilogue_size_needed > 2 && !promoted_val)
20371 force_loopy_epilogue = true;
20374 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20376 /* If main algorithm works on QImode, no epilogue is needed.
20377 For small sizes just don't align anything. */
20378 if (size_needed == 1)
20379 desired_align = align;
20386 label = gen_label_rtx ();
20387 emit_cmp_and_jump_insns (count_exp,
20388 GEN_INT (epilogue_size_needed),
20389 LTU, 0, counter_mode (count_exp), 1, label);
20390 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20391 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20393 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20396 if (dynamic_check != -1)
20398 rtx hot_label = gen_label_rtx ();
20399 jump_around_label = gen_label_rtx ();
20400 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20401 LEU, 0, counter_mode (count_exp), 1, hot_label);
20402 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20403 set_storage_via_libcall (dst, count_exp, val_exp, false);
20404 emit_jump (jump_around_label);
20405 emit_label (hot_label);
20408 /* Step 2: Alignment prologue. */
20410 /* Do the expensive promotion once we branched off the small blocks. */
20412 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20413 desired_align, align);
20414 gcc_assert (desired_align >= 1 && align >= 1);
20416 if (desired_align > align)
20418 if (align_bytes == 0)
20420 /* Except for the first move in epilogue, we no longer know
20421 constant offset in aliasing info. It don't seems to worth
20422 the pain to maintain it for the first move, so throw away
20424 dst = change_address (dst, BLKmode, destreg);
20425 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20430 /* If we know how many bytes need to be stored before dst is
20431 sufficiently aligned, maintain aliasing info accurately. */
20432 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20433 desired_align, align_bytes);
20434 count_exp = plus_constant (count_exp, -align_bytes);
20435 count -= align_bytes;
20437 if (need_zero_guard
20438 && (count < (unsigned HOST_WIDE_INT) size_needed
20439 || (align_bytes == 0
20440 && count < ((unsigned HOST_WIDE_INT) size_needed
20441 + desired_align - align))))
20443 /* It is possible that we copied enough so the main loop will not
20445 gcc_assert (size_needed > 1);
20446 if (label == NULL_RTX)
20447 label = gen_label_rtx ();
20448 emit_cmp_and_jump_insns (count_exp,
20449 GEN_INT (size_needed),
20450 LTU, 0, counter_mode (count_exp), 1, label);
20451 if (expected_size == -1
20452 || expected_size < (desired_align - align) / 2 + size_needed)
20453 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20455 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20458 if (label && size_needed == 1)
20460 emit_label (label);
20461 LABEL_NUSES (label) = 1;
20463 promoted_val = val_exp;
20464 epilogue_size_needed = 1;
20466 else if (label == NULL_RTX)
20467 epilogue_size_needed = size_needed;
20469 /* Step 3: Main loop. */
20475 gcc_unreachable ();
20477 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20478 count_exp, QImode, 1, expected_size);
20481 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20482 count_exp, Pmode, 1, expected_size);
20484 case unrolled_loop:
20485 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20486 count_exp, Pmode, 4, expected_size);
20488 case rep_prefix_8_byte:
20489 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20492 case rep_prefix_4_byte:
20493 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20496 case rep_prefix_1_byte:
20497 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20501 /* Adjust properly the offset of src and dest memory for aliasing. */
20502 if (CONST_INT_P (count_exp))
20503 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20504 (count / size_needed) * size_needed);
20506 dst = change_address (dst, BLKmode, destreg);
20508 /* Step 4: Epilogue to copy the remaining bytes. */
20512 /* When the main loop is done, COUNT_EXP might hold original count,
20513 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20514 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20515 bytes. Compensate if needed. */
20517 if (size_needed < epilogue_size_needed)
20520 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20521 GEN_INT (size_needed - 1), count_exp, 1,
20523 if (tmp != count_exp)
20524 emit_move_insn (count_exp, tmp);
20526 emit_label (label);
20527 LABEL_NUSES (label) = 1;
20530 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20532 if (force_loopy_epilogue)
20533 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20534 epilogue_size_needed);
20536 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20537 epilogue_size_needed);
20539 if (jump_around_label)
20540 emit_label (jump_around_label);
20544 /* Expand the appropriate insns for doing strlen if not just doing
20547 out = result, initialized with the start address
20548 align_rtx = alignment of the address.
20549 scratch = scratch register, initialized with the startaddress when
20550 not aligned, otherwise undefined
20552 This is just the body. It needs the initializations mentioned above and
20553 some address computing at the end. These things are done in i386.md. */
20556 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20560 rtx align_2_label = NULL_RTX;
20561 rtx align_3_label = NULL_RTX;
20562 rtx align_4_label = gen_label_rtx ();
20563 rtx end_0_label = gen_label_rtx ();
20565 rtx tmpreg = gen_reg_rtx (SImode);
20566 rtx scratch = gen_reg_rtx (SImode);
20570 if (CONST_INT_P (align_rtx))
20571 align = INTVAL (align_rtx);
20573 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20575 /* Is there a known alignment and is it less than 4? */
20578 rtx scratch1 = gen_reg_rtx (Pmode);
20579 emit_move_insn (scratch1, out);
20580 /* Is there a known alignment and is it not 2? */
20583 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20584 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20586 /* Leave just the 3 lower bits. */
20587 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20588 NULL_RTX, 0, OPTAB_WIDEN);
20590 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20591 Pmode, 1, align_4_label);
20592 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20593 Pmode, 1, align_2_label);
20594 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20595 Pmode, 1, align_3_label);
20599 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20600 check if is aligned to 4 - byte. */
20602 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20603 NULL_RTX, 0, OPTAB_WIDEN);
20605 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20606 Pmode, 1, align_4_label);
20609 mem = change_address (src, QImode, out);
20611 /* Now compare the bytes. */
20613 /* Compare the first n unaligned byte on a byte per byte basis. */
20614 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20615 QImode, 1, end_0_label);
20617 /* Increment the address. */
20618 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20620 /* Not needed with an alignment of 2 */
20623 emit_label (align_2_label);
20625 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20628 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20630 emit_label (align_3_label);
20633 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20636 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20639 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20640 align this loop. It gives only huge programs, but does not help to
20642 emit_label (align_4_label);
20644 mem = change_address (src, SImode, out);
20645 emit_move_insn (scratch, mem);
20646 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20648 /* This formula yields a nonzero result iff one of the bytes is zero.
20649 This saves three branches inside loop and many cycles. */
20651 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20652 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20653 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20654 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20655 gen_int_mode (0x80808080, SImode)));
20656 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20661 rtx reg = gen_reg_rtx (SImode);
20662 rtx reg2 = gen_reg_rtx (Pmode);
20663 emit_move_insn (reg, tmpreg);
20664 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20666 /* If zero is not in the first two bytes, move two bytes forward. */
20667 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20668 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20669 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20670 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20671 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20674 /* Emit lea manually to avoid clobbering of flags. */
20675 emit_insn (gen_rtx_SET (SImode, reg2,
20676 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20678 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20679 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20680 emit_insn (gen_rtx_SET (VOIDmode, out,
20681 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20687 rtx end_2_label = gen_label_rtx ();
20688 /* Is zero in the first two bytes? */
20690 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20691 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20692 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20693 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20694 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20696 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20697 JUMP_LABEL (tmp) = end_2_label;
20699 /* Not in the first two. Move two bytes forward. */
20700 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20701 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20703 emit_label (end_2_label);
20707 /* Avoid branch in fixing the byte. */
20708 tmpreg = gen_lowpart (QImode, tmpreg);
20709 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20710 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20711 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20712 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20714 emit_label (end_0_label);
20717 /* Expand strlen. */
20720 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20722 rtx addr, scratch1, scratch2, scratch3, scratch4;
20724 /* The generic case of strlen expander is long. Avoid it's
20725 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20727 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20728 && !TARGET_INLINE_ALL_STRINGOPS
20729 && !optimize_insn_for_size_p ()
20730 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20733 addr = force_reg (Pmode, XEXP (src, 0));
20734 scratch1 = gen_reg_rtx (Pmode);
20736 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20737 && !optimize_insn_for_size_p ())
20739 /* Well it seems that some optimizer does not combine a call like
20740 foo(strlen(bar), strlen(bar));
20741 when the move and the subtraction is done here. It does calculate
20742 the length just once when these instructions are done inside of
20743 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20744 often used and I use one fewer register for the lifetime of
20745 output_strlen_unroll() this is better. */
20747 emit_move_insn (out, addr);
20749 ix86_expand_strlensi_unroll_1 (out, src, align);
20751 /* strlensi_unroll_1 returns the address of the zero at the end of
20752 the string, like memchr(), so compute the length by subtracting
20753 the start address. */
20754 emit_insn (ix86_gen_sub3 (out, out, addr));
20760 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20761 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20764 scratch2 = gen_reg_rtx (Pmode);
20765 scratch3 = gen_reg_rtx (Pmode);
20766 scratch4 = force_reg (Pmode, constm1_rtx);
20768 emit_move_insn (scratch3, addr);
20769 eoschar = force_reg (QImode, eoschar);
20771 src = replace_equiv_address_nv (src, scratch3);
20773 /* If .md starts supporting :P, this can be done in .md. */
20774 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20775 scratch4), UNSPEC_SCAS);
20776 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20777 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20778 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20783 /* For given symbol (function) construct code to compute address of it's PLT
20784 entry in large x86-64 PIC model. */
20786 construct_plt_address (rtx symbol)
20788 rtx tmp = gen_reg_rtx (Pmode);
20789 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20791 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20792 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20794 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20795 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20800 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20802 rtx pop, int sibcall)
20804 rtx use = NULL, call;
20806 if (pop == const0_rtx)
20808 gcc_assert (!TARGET_64BIT || !pop);
20810 if (TARGET_MACHO && !TARGET_64BIT)
20813 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20814 fnaddr = machopic_indirect_call_target (fnaddr);
20819 /* Static functions and indirect calls don't need the pic register. */
20820 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20821 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20822 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20823 use_reg (&use, pic_offset_table_rtx);
20826 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20828 rtx al = gen_rtx_REG (QImode, AX_REG);
20829 emit_move_insn (al, callarg2);
20830 use_reg (&use, al);
20833 if (ix86_cmodel == CM_LARGE_PIC
20835 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20836 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20837 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20839 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20840 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20842 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20843 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20846 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20848 call = gen_rtx_SET (VOIDmode, retval, call);
20851 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20852 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20853 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20856 && ix86_cfun_abi () == MS_ABI
20857 && (!callarg2 || INTVAL (callarg2) != -2))
20859 /* We need to represent that SI and DI registers are clobbered
20861 static int clobbered_registers[] = {
20862 XMM6_REG, XMM7_REG, XMM8_REG,
20863 XMM9_REG, XMM10_REG, XMM11_REG,
20864 XMM12_REG, XMM13_REG, XMM14_REG,
20865 XMM15_REG, SI_REG, DI_REG
20868 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20869 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20870 UNSPEC_MS_TO_SYSV_CALL);
20874 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20875 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20878 (SSE_REGNO_P (clobbered_registers[i])
20880 clobbered_registers[i]));
20882 call = gen_rtx_PARALLEL (VOIDmode,
20883 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20887 call = emit_call_insn (call);
20889 CALL_INSN_FUNCTION_USAGE (call) = use;
20895 /* Clear stack slot assignments remembered from previous functions.
20896 This is called from INIT_EXPANDERS once before RTL is emitted for each
20899 static struct machine_function *
20900 ix86_init_machine_status (void)
20902 struct machine_function *f;
20904 f = ggc_alloc_cleared_machine_function ();
20905 f->use_fast_prologue_epilogue_nregs = -1;
20906 f->tls_descriptor_call_expanded_p = 0;
20907 f->call_abi = ix86_abi;
20912 /* Return a MEM corresponding to a stack slot with mode MODE.
20913 Allocate a new slot if necessary.
20915 The RTL for a function can have several slots available: N is
20916 which slot to use. */
20919 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20921 struct stack_local_entry *s;
20923 gcc_assert (n < MAX_386_STACK_LOCALS);
20925 /* Virtual slot is valid only before vregs are instantiated. */
20926 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20928 for (s = ix86_stack_locals; s; s = s->next)
20929 if (s->mode == mode && s->n == n)
20930 return copy_rtx (s->rtl);
20932 s = ggc_alloc_stack_local_entry ();
20935 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20937 s->next = ix86_stack_locals;
20938 ix86_stack_locals = s;
20942 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20944 static GTY(()) rtx ix86_tls_symbol;
20946 ix86_tls_get_addr (void)
20949 if (!ix86_tls_symbol)
20951 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20952 (TARGET_ANY_GNU_TLS
20954 ? "___tls_get_addr"
20955 : "__tls_get_addr");
20958 return ix86_tls_symbol;
20961 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20963 static GTY(()) rtx ix86_tls_module_base_symbol;
20965 ix86_tls_module_base (void)
20968 if (!ix86_tls_module_base_symbol)
20970 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20971 "_TLS_MODULE_BASE_");
20972 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20973 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20976 return ix86_tls_module_base_symbol;
20979 /* Calculate the length of the memory address in the instruction
20980 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20983 memory_address_length (rtx addr)
20985 struct ix86_address parts;
20986 rtx base, index, disp;
20990 if (GET_CODE (addr) == PRE_DEC
20991 || GET_CODE (addr) == POST_INC
20992 || GET_CODE (addr) == PRE_MODIFY
20993 || GET_CODE (addr) == POST_MODIFY)
20996 ok = ix86_decompose_address (addr, &parts);
20999 if (parts.base && GET_CODE (parts.base) == SUBREG)
21000 parts.base = SUBREG_REG (parts.base);
21001 if (parts.index && GET_CODE (parts.index) == SUBREG)
21002 parts.index = SUBREG_REG (parts.index);
21005 index = parts.index;
21010 - esp as the base always wants an index,
21011 - ebp as the base always wants a displacement,
21012 - r12 as the base always wants an index,
21013 - r13 as the base always wants a displacement. */
21015 /* Register Indirect. */
21016 if (base && !index && !disp)
21018 /* esp (for its index) and ebp (for its displacement) need
21019 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21022 && (addr == arg_pointer_rtx
21023 || addr == frame_pointer_rtx
21024 || REGNO (addr) == SP_REG
21025 || REGNO (addr) == BP_REG
21026 || REGNO (addr) == R12_REG
21027 || REGNO (addr) == R13_REG))
21031 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21032 is not disp32, but disp32(%rip), so for disp32
21033 SIB byte is needed, unless print_operand_address
21034 optimizes it into disp32(%rip) or (%rip) is implied
21036 else if (disp && !base && !index)
21043 if (GET_CODE (disp) == CONST)
21044 symbol = XEXP (disp, 0);
21045 if (GET_CODE (symbol) == PLUS
21046 && CONST_INT_P (XEXP (symbol, 1)))
21047 symbol = XEXP (symbol, 0);
21049 if (GET_CODE (symbol) != LABEL_REF
21050 && (GET_CODE (symbol) != SYMBOL_REF
21051 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21052 && (GET_CODE (symbol) != UNSPEC
21053 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21054 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21061 /* Find the length of the displacement constant. */
21064 if (base && satisfies_constraint_K (disp))
21069 /* ebp always wants a displacement. Similarly r13. */
21070 else if (base && REG_P (base)
21071 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21074 /* An index requires the two-byte modrm form.... */
21076 /* ...like esp (or r12), which always wants an index. */
21077 || base == arg_pointer_rtx
21078 || base == frame_pointer_rtx
21079 || (base && REG_P (base)
21080 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21097 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21098 is set, expect that insn have 8bit immediate alternative. */
21100 ix86_attr_length_immediate_default (rtx insn, int shortform)
21104 extract_insn_cached (insn);
21105 for (i = recog_data.n_operands - 1; i >= 0; --i)
21106 if (CONSTANT_P (recog_data.operand[i]))
21108 enum attr_mode mode = get_attr_mode (insn);
21111 if (shortform && CONST_INT_P (recog_data.operand[i]))
21113 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21120 ival = trunc_int_for_mode (ival, HImode);
21123 ival = trunc_int_for_mode (ival, SImode);
21128 if (IN_RANGE (ival, -128, 127))
21145 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21150 fatal_insn ("unknown insn mode", insn);
21155 /* Compute default value for "length_address" attribute. */
21157 ix86_attr_length_address_default (rtx insn)
21161 if (get_attr_type (insn) == TYPE_LEA)
21163 rtx set = PATTERN (insn), addr;
21165 if (GET_CODE (set) == PARALLEL)
21166 set = XVECEXP (set, 0, 0);
21168 gcc_assert (GET_CODE (set) == SET);
21170 addr = SET_SRC (set);
21171 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21173 if (GET_CODE (addr) == ZERO_EXTEND)
21174 addr = XEXP (addr, 0);
21175 if (GET_CODE (addr) == SUBREG)
21176 addr = SUBREG_REG (addr);
21179 return memory_address_length (addr);
21182 extract_insn_cached (insn);
21183 for (i = recog_data.n_operands - 1; i >= 0; --i)
21184 if (MEM_P (recog_data.operand[i]))
21186 constrain_operands_cached (reload_completed);
21187 if (which_alternative != -1)
21189 const char *constraints = recog_data.constraints[i];
21190 int alt = which_alternative;
21192 while (*constraints == '=' || *constraints == '+')
21195 while (*constraints++ != ',')
21197 /* Skip ignored operands. */
21198 if (*constraints == 'X')
21201 return memory_address_length (XEXP (recog_data.operand[i], 0));
21206 /* Compute default value for "length_vex" attribute. It includes
21207 2 or 3 byte VEX prefix and 1 opcode byte. */
21210 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21215 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21216 byte VEX prefix. */
21217 if (!has_0f_opcode || has_vex_w)
21220 /* We can always use 2 byte VEX prefix in 32bit. */
21224 extract_insn_cached (insn);
21226 for (i = recog_data.n_operands - 1; i >= 0; --i)
21227 if (REG_P (recog_data.operand[i]))
21229 /* REX.W bit uses 3 byte VEX prefix. */
21230 if (GET_MODE (recog_data.operand[i]) == DImode
21231 && GENERAL_REG_P (recog_data.operand[i]))
21236 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21237 if (MEM_P (recog_data.operand[i])
21238 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21245 /* Return the maximum number of instructions a cpu can issue. */
21248 ix86_issue_rate (void)
21252 case PROCESSOR_PENTIUM:
21253 case PROCESSOR_ATOM:
21257 case PROCESSOR_PENTIUMPRO:
21258 case PROCESSOR_PENTIUM4:
21259 case PROCESSOR_ATHLON:
21261 case PROCESSOR_AMDFAM10:
21262 case PROCESSOR_NOCONA:
21263 case PROCESSOR_GENERIC32:
21264 case PROCESSOR_GENERIC64:
21265 case PROCESSOR_BDVER1:
21268 case PROCESSOR_CORE2:
21276 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21277 by DEP_INSN and nothing set by DEP_INSN. */
21280 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21284 /* Simplify the test for uninteresting insns. */
21285 if (insn_type != TYPE_SETCC
21286 && insn_type != TYPE_ICMOV
21287 && insn_type != TYPE_FCMOV
21288 && insn_type != TYPE_IBR)
21291 if ((set = single_set (dep_insn)) != 0)
21293 set = SET_DEST (set);
21296 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21297 && XVECLEN (PATTERN (dep_insn), 0) == 2
21298 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21299 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21301 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21302 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21307 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21310 /* This test is true if the dependent insn reads the flags but
21311 not any other potentially set register. */
21312 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21315 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21321 /* Return true iff USE_INSN has a memory address with operands set by
21325 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21328 extract_insn_cached (use_insn);
21329 for (i = recog_data.n_operands - 1; i >= 0; --i)
21330 if (MEM_P (recog_data.operand[i]))
21332 rtx addr = XEXP (recog_data.operand[i], 0);
21333 return modified_in_p (addr, set_insn) != 0;
21339 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21341 enum attr_type insn_type, dep_insn_type;
21342 enum attr_memory memory;
21344 int dep_insn_code_number;
21346 /* Anti and output dependencies have zero cost on all CPUs. */
21347 if (REG_NOTE_KIND (link) != 0)
21350 dep_insn_code_number = recog_memoized (dep_insn);
21352 /* If we can't recognize the insns, we can't really do anything. */
21353 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21356 insn_type = get_attr_type (insn);
21357 dep_insn_type = get_attr_type (dep_insn);
21361 case PROCESSOR_PENTIUM:
21362 /* Address Generation Interlock adds a cycle of latency. */
21363 if (insn_type == TYPE_LEA)
21365 rtx addr = PATTERN (insn);
21367 if (GET_CODE (addr) == PARALLEL)
21368 addr = XVECEXP (addr, 0, 0);
21370 gcc_assert (GET_CODE (addr) == SET);
21372 addr = SET_SRC (addr);
21373 if (modified_in_p (addr, dep_insn))
21376 else if (ix86_agi_dependent (dep_insn, insn))
21379 /* ??? Compares pair with jump/setcc. */
21380 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21383 /* Floating point stores require value to be ready one cycle earlier. */
21384 if (insn_type == TYPE_FMOV
21385 && get_attr_memory (insn) == MEMORY_STORE
21386 && !ix86_agi_dependent (dep_insn, insn))
21390 case PROCESSOR_PENTIUMPRO:
21391 memory = get_attr_memory (insn);
21393 /* INT->FP conversion is expensive. */
21394 if (get_attr_fp_int_src (dep_insn))
21397 /* There is one cycle extra latency between an FP op and a store. */
21398 if (insn_type == TYPE_FMOV
21399 && (set = single_set (dep_insn)) != NULL_RTX
21400 && (set2 = single_set (insn)) != NULL_RTX
21401 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21402 && MEM_P (SET_DEST (set2)))
21405 /* Show ability of reorder buffer to hide latency of load by executing
21406 in parallel with previous instruction in case
21407 previous instruction is not needed to compute the address. */
21408 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21409 && !ix86_agi_dependent (dep_insn, insn))
21411 /* Claim moves to take one cycle, as core can issue one load
21412 at time and the next load can start cycle later. */
21413 if (dep_insn_type == TYPE_IMOV
21414 || dep_insn_type == TYPE_FMOV)
21422 memory = get_attr_memory (insn);
21424 /* The esp dependency is resolved before the instruction is really
21426 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21427 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21430 /* INT->FP conversion is expensive. */
21431 if (get_attr_fp_int_src (dep_insn))
21434 /* Show ability of reorder buffer to hide latency of load by executing
21435 in parallel with previous instruction in case
21436 previous instruction is not needed to compute the address. */
21437 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21438 && !ix86_agi_dependent (dep_insn, insn))
21440 /* Claim moves to take one cycle, as core can issue one load
21441 at time and the next load can start cycle later. */
21442 if (dep_insn_type == TYPE_IMOV
21443 || dep_insn_type == TYPE_FMOV)
21452 case PROCESSOR_ATHLON:
21454 case PROCESSOR_AMDFAM10:
21455 case PROCESSOR_BDVER1:
21456 case PROCESSOR_ATOM:
21457 case PROCESSOR_GENERIC32:
21458 case PROCESSOR_GENERIC64:
21459 memory = get_attr_memory (insn);
21461 /* Show ability of reorder buffer to hide latency of load by executing
21462 in parallel with previous instruction in case
21463 previous instruction is not needed to compute the address. */
21464 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21465 && !ix86_agi_dependent (dep_insn, insn))
21467 enum attr_unit unit = get_attr_unit (insn);
21470 /* Because of the difference between the length of integer and
21471 floating unit pipeline preparation stages, the memory operands
21472 for floating point are cheaper.
21474 ??? For Athlon it the difference is most probably 2. */
21475 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21478 loadcost = TARGET_ATHLON ? 2 : 0;
21480 if (cost >= loadcost)
21493 /* How many alternative schedules to try. This should be as wide as the
21494 scheduling freedom in the DFA, but no wider. Making this value too
21495 large results extra work for the scheduler. */
21498 ia32_multipass_dfa_lookahead (void)
21502 case PROCESSOR_PENTIUM:
21505 case PROCESSOR_PENTIUMPRO:
21515 /* Compute the alignment given to a constant that is being placed in memory.
21516 EXP is the constant and ALIGN is the alignment that the object would
21518 The value of this function is used instead of that alignment to align
21522 ix86_constant_alignment (tree exp, int align)
21524 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21525 || TREE_CODE (exp) == INTEGER_CST)
21527 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21529 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21532 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21533 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21534 return BITS_PER_WORD;
21539 /* Compute the alignment for a static variable.
21540 TYPE is the data type, and ALIGN is the alignment that
21541 the object would ordinarily have. The value of this function is used
21542 instead of that alignment to align the object. */
21545 ix86_data_alignment (tree type, int align)
21547 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21549 if (AGGREGATE_TYPE_P (type)
21550 && TYPE_SIZE (type)
21551 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21552 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21553 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21554 && align < max_align)
21557 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21558 to 16byte boundary. */
21561 if (AGGREGATE_TYPE_P (type)
21562 && TYPE_SIZE (type)
21563 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21564 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21565 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21569 if (TREE_CODE (type) == ARRAY_TYPE)
21571 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21573 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21576 else if (TREE_CODE (type) == COMPLEX_TYPE)
21579 if (TYPE_MODE (type) == DCmode && align < 64)
21581 if ((TYPE_MODE (type) == XCmode
21582 || TYPE_MODE (type) == TCmode) && align < 128)
21585 else if ((TREE_CODE (type) == RECORD_TYPE
21586 || TREE_CODE (type) == UNION_TYPE
21587 || TREE_CODE (type) == QUAL_UNION_TYPE)
21588 && TYPE_FIELDS (type))
21590 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21592 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21595 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21596 || TREE_CODE (type) == INTEGER_TYPE)
21598 if (TYPE_MODE (type) == DFmode && align < 64)
21600 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21607 /* Compute the alignment for a local variable or a stack slot. EXP is
21608 the data type or decl itself, MODE is the widest mode available and
21609 ALIGN is the alignment that the object would ordinarily have. The
21610 value of this macro is used instead of that alignment to align the
21614 ix86_local_alignment (tree exp, enum machine_mode mode,
21615 unsigned int align)
21619 if (exp && DECL_P (exp))
21621 type = TREE_TYPE (exp);
21630 /* Don't do dynamic stack realignment for long long objects with
21631 -mpreferred-stack-boundary=2. */
21634 && ix86_preferred_stack_boundary < 64
21635 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21636 && (!type || !TYPE_USER_ALIGN (type))
21637 && (!decl || !DECL_USER_ALIGN (decl)))
21640 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21641 register in MODE. We will return the largest alignment of XF
21645 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21646 align = GET_MODE_ALIGNMENT (DFmode);
21650 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21651 to 16byte boundary. Exact wording is:
21653 An array uses the same alignment as its elements, except that a local or
21654 global array variable of length at least 16 bytes or
21655 a C99 variable-length array variable always has alignment of at least 16 bytes.
21657 This was added to allow use of aligned SSE instructions at arrays. This
21658 rule is meant for static storage (where compiler can not do the analysis
21659 by itself). We follow it for automatic variables only when convenient.
21660 We fully control everything in the function compiled and functions from
21661 other unit can not rely on the alignment.
21663 Exclude va_list type. It is the common case of local array where
21664 we can not benefit from the alignment. */
21665 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21668 if (AGGREGATE_TYPE_P (type)
21669 && (TYPE_MAIN_VARIANT (type)
21670 != TYPE_MAIN_VARIANT (va_list_type_node))
21671 && TYPE_SIZE (type)
21672 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21673 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21674 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21677 if (TREE_CODE (type) == ARRAY_TYPE)
21679 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21681 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21684 else if (TREE_CODE (type) == COMPLEX_TYPE)
21686 if (TYPE_MODE (type) == DCmode && align < 64)
21688 if ((TYPE_MODE (type) == XCmode
21689 || TYPE_MODE (type) == TCmode) && align < 128)
21692 else if ((TREE_CODE (type) == RECORD_TYPE
21693 || TREE_CODE (type) == UNION_TYPE
21694 || TREE_CODE (type) == QUAL_UNION_TYPE)
21695 && TYPE_FIELDS (type))
21697 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21699 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21702 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21703 || TREE_CODE (type) == INTEGER_TYPE)
21706 if (TYPE_MODE (type) == DFmode && align < 64)
21708 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21714 /* Compute the minimum required alignment for dynamic stack realignment
21715 purposes for a local variable, parameter or a stack slot. EXP is
21716 the data type or decl itself, MODE is its mode and ALIGN is the
21717 alignment that the object would ordinarily have. */
21720 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21721 unsigned int align)
21725 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21728 if (exp && DECL_P (exp))
21730 type = TREE_TYPE (exp);
21739 /* Don't do dynamic stack realignment for long long objects with
21740 -mpreferred-stack-boundary=2. */
21741 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21742 && (!type || !TYPE_USER_ALIGN (type))
21743 && (!decl || !DECL_USER_ALIGN (decl)))
21749 /* Find a location for the static chain incoming to a nested function.
21750 This is a register, unless all free registers are used by arguments. */
21753 ix86_static_chain (const_tree fndecl, bool incoming_p)
21757 if (!DECL_STATIC_CHAIN (fndecl))
21762 /* We always use R10 in 64-bit mode. */
21768 /* By default in 32-bit mode we use ECX to pass the static chain. */
21771 fntype = TREE_TYPE (fndecl);
21772 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21774 /* Fastcall functions use ecx/edx for arguments, which leaves
21775 us with EAX for the static chain. */
21778 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21780 /* Thiscall functions use ecx for arguments, which leaves
21781 us with EAX for the static chain. */
21784 else if (ix86_function_regparm (fntype, fndecl) == 3)
21786 /* For regparm 3, we have no free call-clobbered registers in
21787 which to store the static chain. In order to implement this,
21788 we have the trampoline push the static chain to the stack.
21789 However, we can't push a value below the return address when
21790 we call the nested function directly, so we have to use an
21791 alternate entry point. For this we use ESI, and have the
21792 alternate entry point push ESI, so that things appear the
21793 same once we're executing the nested function. */
21796 if (fndecl == current_function_decl)
21797 ix86_static_chain_on_stack = true;
21798 return gen_frame_mem (SImode,
21799 plus_constant (arg_pointer_rtx, -8));
21805 return gen_rtx_REG (Pmode, regno);
21808 /* Emit RTL insns to initialize the variable parts of a trampoline.
21809 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21810 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21811 to be passed to the target function. */
21814 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21818 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21825 /* Depending on the static chain location, either load a register
21826 with a constant, or push the constant to the stack. All of the
21827 instructions are the same size. */
21828 chain = ix86_static_chain (fndecl, true);
21831 if (REGNO (chain) == CX_REG)
21833 else if (REGNO (chain) == AX_REG)
21836 gcc_unreachable ();
21841 mem = adjust_address (m_tramp, QImode, 0);
21842 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21844 mem = adjust_address (m_tramp, SImode, 1);
21845 emit_move_insn (mem, chain_value);
21847 /* Compute offset from the end of the jmp to the target function.
21848 In the case in which the trampoline stores the static chain on
21849 the stack, we need to skip the first insn which pushes the
21850 (call-saved) register static chain; this push is 1 byte. */
21851 disp = expand_binop (SImode, sub_optab, fnaddr,
21852 plus_constant (XEXP (m_tramp, 0),
21853 MEM_P (chain) ? 9 : 10),
21854 NULL_RTX, 1, OPTAB_DIRECT);
21856 mem = adjust_address (m_tramp, QImode, 5);
21857 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21859 mem = adjust_address (m_tramp, SImode, 6);
21860 emit_move_insn (mem, disp);
21866 /* Load the function address to r11. Try to load address using
21867 the shorter movl instead of movabs. We may want to support
21868 movq for kernel mode, but kernel does not use trampolines at
21870 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21872 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21874 mem = adjust_address (m_tramp, HImode, offset);
21875 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21877 mem = adjust_address (m_tramp, SImode, offset + 2);
21878 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21883 mem = adjust_address (m_tramp, HImode, offset);
21884 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21886 mem = adjust_address (m_tramp, DImode, offset + 2);
21887 emit_move_insn (mem, fnaddr);
21891 /* Load static chain using movabs to r10. */
21892 mem = adjust_address (m_tramp, HImode, offset);
21893 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21895 mem = adjust_address (m_tramp, DImode, offset + 2);
21896 emit_move_insn (mem, chain_value);
21899 /* Jump to r11; the last (unused) byte is a nop, only there to
21900 pad the write out to a single 32-bit store. */
21901 mem = adjust_address (m_tramp, SImode, offset);
21902 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21905 gcc_assert (offset <= TRAMPOLINE_SIZE);
21908 #ifdef ENABLE_EXECUTE_STACK
21909 #ifdef CHECK_EXECUTE_STACK_ENABLED
21910 if (CHECK_EXECUTE_STACK_ENABLED)
21912 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21913 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21917 /* The following file contains several enumerations and data structures
21918 built from the definitions in i386-builtin-types.def. */
21920 #include "i386-builtin-types.inc"
21922 /* Table for the ix86 builtin non-function types. */
21923 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21925 /* Retrieve an element from the above table, building some of
21926 the types lazily. */
21929 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21931 unsigned int index;
21934 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21936 type = ix86_builtin_type_tab[(int) tcode];
21940 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21941 if (tcode <= IX86_BT_LAST_VECT)
21943 enum machine_mode mode;
21945 index = tcode - IX86_BT_LAST_PRIM - 1;
21946 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21947 mode = ix86_builtin_type_vect_mode[index];
21949 type = build_vector_type_for_mode (itype, mode);
21955 index = tcode - IX86_BT_LAST_VECT - 1;
21956 if (tcode <= IX86_BT_LAST_PTR)
21957 quals = TYPE_UNQUALIFIED;
21959 quals = TYPE_QUAL_CONST;
21961 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21962 if (quals != TYPE_UNQUALIFIED)
21963 itype = build_qualified_type (itype, quals);
21965 type = build_pointer_type (itype);
21968 ix86_builtin_type_tab[(int) tcode] = type;
21972 /* Table for the ix86 builtin function types. */
21973 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21975 /* Retrieve an element from the above table, building some of
21976 the types lazily. */
21979 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21983 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21985 type = ix86_builtin_func_type_tab[(int) tcode];
21989 if (tcode <= IX86_BT_LAST_FUNC)
21991 unsigned start = ix86_builtin_func_start[(int) tcode];
21992 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21993 tree rtype, atype, args = void_list_node;
21996 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21997 for (i = after - 1; i > start; --i)
21999 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
22000 args = tree_cons (NULL, atype, args);
22003 type = build_function_type (rtype, args);
22007 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
22008 enum ix86_builtin_func_type icode;
22010 icode = ix86_builtin_func_alias_base[index];
22011 type = ix86_get_builtin_func_type (icode);
22014 ix86_builtin_func_type_tab[(int) tcode] = type;
22019 /* Codes for all the SSE/MMX builtins. */
22022 IX86_BUILTIN_ADDPS,
22023 IX86_BUILTIN_ADDSS,
22024 IX86_BUILTIN_DIVPS,
22025 IX86_BUILTIN_DIVSS,
22026 IX86_BUILTIN_MULPS,
22027 IX86_BUILTIN_MULSS,
22028 IX86_BUILTIN_SUBPS,
22029 IX86_BUILTIN_SUBSS,
22031 IX86_BUILTIN_CMPEQPS,
22032 IX86_BUILTIN_CMPLTPS,
22033 IX86_BUILTIN_CMPLEPS,
22034 IX86_BUILTIN_CMPGTPS,
22035 IX86_BUILTIN_CMPGEPS,
22036 IX86_BUILTIN_CMPNEQPS,
22037 IX86_BUILTIN_CMPNLTPS,
22038 IX86_BUILTIN_CMPNLEPS,
22039 IX86_BUILTIN_CMPNGTPS,
22040 IX86_BUILTIN_CMPNGEPS,
22041 IX86_BUILTIN_CMPORDPS,
22042 IX86_BUILTIN_CMPUNORDPS,
22043 IX86_BUILTIN_CMPEQSS,
22044 IX86_BUILTIN_CMPLTSS,
22045 IX86_BUILTIN_CMPLESS,
22046 IX86_BUILTIN_CMPNEQSS,
22047 IX86_BUILTIN_CMPNLTSS,
22048 IX86_BUILTIN_CMPNLESS,
22049 IX86_BUILTIN_CMPNGTSS,
22050 IX86_BUILTIN_CMPNGESS,
22051 IX86_BUILTIN_CMPORDSS,
22052 IX86_BUILTIN_CMPUNORDSS,
22054 IX86_BUILTIN_COMIEQSS,
22055 IX86_BUILTIN_COMILTSS,
22056 IX86_BUILTIN_COMILESS,
22057 IX86_BUILTIN_COMIGTSS,
22058 IX86_BUILTIN_COMIGESS,
22059 IX86_BUILTIN_COMINEQSS,
22060 IX86_BUILTIN_UCOMIEQSS,
22061 IX86_BUILTIN_UCOMILTSS,
22062 IX86_BUILTIN_UCOMILESS,
22063 IX86_BUILTIN_UCOMIGTSS,
22064 IX86_BUILTIN_UCOMIGESS,
22065 IX86_BUILTIN_UCOMINEQSS,
22067 IX86_BUILTIN_CVTPI2PS,
22068 IX86_BUILTIN_CVTPS2PI,
22069 IX86_BUILTIN_CVTSI2SS,
22070 IX86_BUILTIN_CVTSI642SS,
22071 IX86_BUILTIN_CVTSS2SI,
22072 IX86_BUILTIN_CVTSS2SI64,
22073 IX86_BUILTIN_CVTTPS2PI,
22074 IX86_BUILTIN_CVTTSS2SI,
22075 IX86_BUILTIN_CVTTSS2SI64,
22077 IX86_BUILTIN_MAXPS,
22078 IX86_BUILTIN_MAXSS,
22079 IX86_BUILTIN_MINPS,
22080 IX86_BUILTIN_MINSS,
22082 IX86_BUILTIN_LOADUPS,
22083 IX86_BUILTIN_STOREUPS,
22084 IX86_BUILTIN_MOVSS,
22086 IX86_BUILTIN_MOVHLPS,
22087 IX86_BUILTIN_MOVLHPS,
22088 IX86_BUILTIN_LOADHPS,
22089 IX86_BUILTIN_LOADLPS,
22090 IX86_BUILTIN_STOREHPS,
22091 IX86_BUILTIN_STORELPS,
22093 IX86_BUILTIN_MASKMOVQ,
22094 IX86_BUILTIN_MOVMSKPS,
22095 IX86_BUILTIN_PMOVMSKB,
22097 IX86_BUILTIN_MOVNTPS,
22098 IX86_BUILTIN_MOVNTQ,
22100 IX86_BUILTIN_LOADDQU,
22101 IX86_BUILTIN_STOREDQU,
22103 IX86_BUILTIN_PACKSSWB,
22104 IX86_BUILTIN_PACKSSDW,
22105 IX86_BUILTIN_PACKUSWB,
22107 IX86_BUILTIN_PADDB,
22108 IX86_BUILTIN_PADDW,
22109 IX86_BUILTIN_PADDD,
22110 IX86_BUILTIN_PADDQ,
22111 IX86_BUILTIN_PADDSB,
22112 IX86_BUILTIN_PADDSW,
22113 IX86_BUILTIN_PADDUSB,
22114 IX86_BUILTIN_PADDUSW,
22115 IX86_BUILTIN_PSUBB,
22116 IX86_BUILTIN_PSUBW,
22117 IX86_BUILTIN_PSUBD,
22118 IX86_BUILTIN_PSUBQ,
22119 IX86_BUILTIN_PSUBSB,
22120 IX86_BUILTIN_PSUBSW,
22121 IX86_BUILTIN_PSUBUSB,
22122 IX86_BUILTIN_PSUBUSW,
22125 IX86_BUILTIN_PANDN,
22129 IX86_BUILTIN_PAVGB,
22130 IX86_BUILTIN_PAVGW,
22132 IX86_BUILTIN_PCMPEQB,
22133 IX86_BUILTIN_PCMPEQW,
22134 IX86_BUILTIN_PCMPEQD,
22135 IX86_BUILTIN_PCMPGTB,
22136 IX86_BUILTIN_PCMPGTW,
22137 IX86_BUILTIN_PCMPGTD,
22139 IX86_BUILTIN_PMADDWD,
22141 IX86_BUILTIN_PMAXSW,
22142 IX86_BUILTIN_PMAXUB,
22143 IX86_BUILTIN_PMINSW,
22144 IX86_BUILTIN_PMINUB,
22146 IX86_BUILTIN_PMULHUW,
22147 IX86_BUILTIN_PMULHW,
22148 IX86_BUILTIN_PMULLW,
22150 IX86_BUILTIN_PSADBW,
22151 IX86_BUILTIN_PSHUFW,
22153 IX86_BUILTIN_PSLLW,
22154 IX86_BUILTIN_PSLLD,
22155 IX86_BUILTIN_PSLLQ,
22156 IX86_BUILTIN_PSRAW,
22157 IX86_BUILTIN_PSRAD,
22158 IX86_BUILTIN_PSRLW,
22159 IX86_BUILTIN_PSRLD,
22160 IX86_BUILTIN_PSRLQ,
22161 IX86_BUILTIN_PSLLWI,
22162 IX86_BUILTIN_PSLLDI,
22163 IX86_BUILTIN_PSLLQI,
22164 IX86_BUILTIN_PSRAWI,
22165 IX86_BUILTIN_PSRADI,
22166 IX86_BUILTIN_PSRLWI,
22167 IX86_BUILTIN_PSRLDI,
22168 IX86_BUILTIN_PSRLQI,
22170 IX86_BUILTIN_PUNPCKHBW,
22171 IX86_BUILTIN_PUNPCKHWD,
22172 IX86_BUILTIN_PUNPCKHDQ,
22173 IX86_BUILTIN_PUNPCKLBW,
22174 IX86_BUILTIN_PUNPCKLWD,
22175 IX86_BUILTIN_PUNPCKLDQ,
22177 IX86_BUILTIN_SHUFPS,
22179 IX86_BUILTIN_RCPPS,
22180 IX86_BUILTIN_RCPSS,
22181 IX86_BUILTIN_RSQRTPS,
22182 IX86_BUILTIN_RSQRTPS_NR,
22183 IX86_BUILTIN_RSQRTSS,
22184 IX86_BUILTIN_RSQRTF,
22185 IX86_BUILTIN_SQRTPS,
22186 IX86_BUILTIN_SQRTPS_NR,
22187 IX86_BUILTIN_SQRTSS,
22189 IX86_BUILTIN_UNPCKHPS,
22190 IX86_BUILTIN_UNPCKLPS,
22192 IX86_BUILTIN_ANDPS,
22193 IX86_BUILTIN_ANDNPS,
22195 IX86_BUILTIN_XORPS,
22198 IX86_BUILTIN_LDMXCSR,
22199 IX86_BUILTIN_STMXCSR,
22200 IX86_BUILTIN_SFENCE,
22202 /* 3DNow! Original */
22203 IX86_BUILTIN_FEMMS,
22204 IX86_BUILTIN_PAVGUSB,
22205 IX86_BUILTIN_PF2ID,
22206 IX86_BUILTIN_PFACC,
22207 IX86_BUILTIN_PFADD,
22208 IX86_BUILTIN_PFCMPEQ,
22209 IX86_BUILTIN_PFCMPGE,
22210 IX86_BUILTIN_PFCMPGT,
22211 IX86_BUILTIN_PFMAX,
22212 IX86_BUILTIN_PFMIN,
22213 IX86_BUILTIN_PFMUL,
22214 IX86_BUILTIN_PFRCP,
22215 IX86_BUILTIN_PFRCPIT1,
22216 IX86_BUILTIN_PFRCPIT2,
22217 IX86_BUILTIN_PFRSQIT1,
22218 IX86_BUILTIN_PFRSQRT,
22219 IX86_BUILTIN_PFSUB,
22220 IX86_BUILTIN_PFSUBR,
22221 IX86_BUILTIN_PI2FD,
22222 IX86_BUILTIN_PMULHRW,
22224 /* 3DNow! Athlon Extensions */
22225 IX86_BUILTIN_PF2IW,
22226 IX86_BUILTIN_PFNACC,
22227 IX86_BUILTIN_PFPNACC,
22228 IX86_BUILTIN_PI2FW,
22229 IX86_BUILTIN_PSWAPDSI,
22230 IX86_BUILTIN_PSWAPDSF,
22233 IX86_BUILTIN_ADDPD,
22234 IX86_BUILTIN_ADDSD,
22235 IX86_BUILTIN_DIVPD,
22236 IX86_BUILTIN_DIVSD,
22237 IX86_BUILTIN_MULPD,
22238 IX86_BUILTIN_MULSD,
22239 IX86_BUILTIN_SUBPD,
22240 IX86_BUILTIN_SUBSD,
22242 IX86_BUILTIN_CMPEQPD,
22243 IX86_BUILTIN_CMPLTPD,
22244 IX86_BUILTIN_CMPLEPD,
22245 IX86_BUILTIN_CMPGTPD,
22246 IX86_BUILTIN_CMPGEPD,
22247 IX86_BUILTIN_CMPNEQPD,
22248 IX86_BUILTIN_CMPNLTPD,
22249 IX86_BUILTIN_CMPNLEPD,
22250 IX86_BUILTIN_CMPNGTPD,
22251 IX86_BUILTIN_CMPNGEPD,
22252 IX86_BUILTIN_CMPORDPD,
22253 IX86_BUILTIN_CMPUNORDPD,
22254 IX86_BUILTIN_CMPEQSD,
22255 IX86_BUILTIN_CMPLTSD,
22256 IX86_BUILTIN_CMPLESD,
22257 IX86_BUILTIN_CMPNEQSD,
22258 IX86_BUILTIN_CMPNLTSD,
22259 IX86_BUILTIN_CMPNLESD,
22260 IX86_BUILTIN_CMPORDSD,
22261 IX86_BUILTIN_CMPUNORDSD,
22263 IX86_BUILTIN_COMIEQSD,
22264 IX86_BUILTIN_COMILTSD,
22265 IX86_BUILTIN_COMILESD,
22266 IX86_BUILTIN_COMIGTSD,
22267 IX86_BUILTIN_COMIGESD,
22268 IX86_BUILTIN_COMINEQSD,
22269 IX86_BUILTIN_UCOMIEQSD,
22270 IX86_BUILTIN_UCOMILTSD,
22271 IX86_BUILTIN_UCOMILESD,
22272 IX86_BUILTIN_UCOMIGTSD,
22273 IX86_BUILTIN_UCOMIGESD,
22274 IX86_BUILTIN_UCOMINEQSD,
22276 IX86_BUILTIN_MAXPD,
22277 IX86_BUILTIN_MAXSD,
22278 IX86_BUILTIN_MINPD,
22279 IX86_BUILTIN_MINSD,
22281 IX86_BUILTIN_ANDPD,
22282 IX86_BUILTIN_ANDNPD,
22284 IX86_BUILTIN_XORPD,
22286 IX86_BUILTIN_SQRTPD,
22287 IX86_BUILTIN_SQRTSD,
22289 IX86_BUILTIN_UNPCKHPD,
22290 IX86_BUILTIN_UNPCKLPD,
22292 IX86_BUILTIN_SHUFPD,
22294 IX86_BUILTIN_LOADUPD,
22295 IX86_BUILTIN_STOREUPD,
22296 IX86_BUILTIN_MOVSD,
22298 IX86_BUILTIN_LOADHPD,
22299 IX86_BUILTIN_LOADLPD,
22301 IX86_BUILTIN_CVTDQ2PD,
22302 IX86_BUILTIN_CVTDQ2PS,
22304 IX86_BUILTIN_CVTPD2DQ,
22305 IX86_BUILTIN_CVTPD2PI,
22306 IX86_BUILTIN_CVTPD2PS,
22307 IX86_BUILTIN_CVTTPD2DQ,
22308 IX86_BUILTIN_CVTTPD2PI,
22310 IX86_BUILTIN_CVTPI2PD,
22311 IX86_BUILTIN_CVTSI2SD,
22312 IX86_BUILTIN_CVTSI642SD,
22314 IX86_BUILTIN_CVTSD2SI,
22315 IX86_BUILTIN_CVTSD2SI64,
22316 IX86_BUILTIN_CVTSD2SS,
22317 IX86_BUILTIN_CVTSS2SD,
22318 IX86_BUILTIN_CVTTSD2SI,
22319 IX86_BUILTIN_CVTTSD2SI64,
22321 IX86_BUILTIN_CVTPS2DQ,
22322 IX86_BUILTIN_CVTPS2PD,
22323 IX86_BUILTIN_CVTTPS2DQ,
22325 IX86_BUILTIN_MOVNTI,
22326 IX86_BUILTIN_MOVNTPD,
22327 IX86_BUILTIN_MOVNTDQ,
22329 IX86_BUILTIN_MOVQ128,
22332 IX86_BUILTIN_MASKMOVDQU,
22333 IX86_BUILTIN_MOVMSKPD,
22334 IX86_BUILTIN_PMOVMSKB128,
22336 IX86_BUILTIN_PACKSSWB128,
22337 IX86_BUILTIN_PACKSSDW128,
22338 IX86_BUILTIN_PACKUSWB128,
22340 IX86_BUILTIN_PADDB128,
22341 IX86_BUILTIN_PADDW128,
22342 IX86_BUILTIN_PADDD128,
22343 IX86_BUILTIN_PADDQ128,
22344 IX86_BUILTIN_PADDSB128,
22345 IX86_BUILTIN_PADDSW128,
22346 IX86_BUILTIN_PADDUSB128,
22347 IX86_BUILTIN_PADDUSW128,
22348 IX86_BUILTIN_PSUBB128,
22349 IX86_BUILTIN_PSUBW128,
22350 IX86_BUILTIN_PSUBD128,
22351 IX86_BUILTIN_PSUBQ128,
22352 IX86_BUILTIN_PSUBSB128,
22353 IX86_BUILTIN_PSUBSW128,
22354 IX86_BUILTIN_PSUBUSB128,
22355 IX86_BUILTIN_PSUBUSW128,
22357 IX86_BUILTIN_PAND128,
22358 IX86_BUILTIN_PANDN128,
22359 IX86_BUILTIN_POR128,
22360 IX86_BUILTIN_PXOR128,
22362 IX86_BUILTIN_PAVGB128,
22363 IX86_BUILTIN_PAVGW128,
22365 IX86_BUILTIN_PCMPEQB128,
22366 IX86_BUILTIN_PCMPEQW128,
22367 IX86_BUILTIN_PCMPEQD128,
22368 IX86_BUILTIN_PCMPGTB128,
22369 IX86_BUILTIN_PCMPGTW128,
22370 IX86_BUILTIN_PCMPGTD128,
22372 IX86_BUILTIN_PMADDWD128,
22374 IX86_BUILTIN_PMAXSW128,
22375 IX86_BUILTIN_PMAXUB128,
22376 IX86_BUILTIN_PMINSW128,
22377 IX86_BUILTIN_PMINUB128,
22379 IX86_BUILTIN_PMULUDQ,
22380 IX86_BUILTIN_PMULUDQ128,
22381 IX86_BUILTIN_PMULHUW128,
22382 IX86_BUILTIN_PMULHW128,
22383 IX86_BUILTIN_PMULLW128,
22385 IX86_BUILTIN_PSADBW128,
22386 IX86_BUILTIN_PSHUFHW,
22387 IX86_BUILTIN_PSHUFLW,
22388 IX86_BUILTIN_PSHUFD,
22390 IX86_BUILTIN_PSLLDQI128,
22391 IX86_BUILTIN_PSLLWI128,
22392 IX86_BUILTIN_PSLLDI128,
22393 IX86_BUILTIN_PSLLQI128,
22394 IX86_BUILTIN_PSRAWI128,
22395 IX86_BUILTIN_PSRADI128,
22396 IX86_BUILTIN_PSRLDQI128,
22397 IX86_BUILTIN_PSRLWI128,
22398 IX86_BUILTIN_PSRLDI128,
22399 IX86_BUILTIN_PSRLQI128,
22401 IX86_BUILTIN_PSLLDQ128,
22402 IX86_BUILTIN_PSLLW128,
22403 IX86_BUILTIN_PSLLD128,
22404 IX86_BUILTIN_PSLLQ128,
22405 IX86_BUILTIN_PSRAW128,
22406 IX86_BUILTIN_PSRAD128,
22407 IX86_BUILTIN_PSRLW128,
22408 IX86_BUILTIN_PSRLD128,
22409 IX86_BUILTIN_PSRLQ128,
22411 IX86_BUILTIN_PUNPCKHBW128,
22412 IX86_BUILTIN_PUNPCKHWD128,
22413 IX86_BUILTIN_PUNPCKHDQ128,
22414 IX86_BUILTIN_PUNPCKHQDQ128,
22415 IX86_BUILTIN_PUNPCKLBW128,
22416 IX86_BUILTIN_PUNPCKLWD128,
22417 IX86_BUILTIN_PUNPCKLDQ128,
22418 IX86_BUILTIN_PUNPCKLQDQ128,
22420 IX86_BUILTIN_CLFLUSH,
22421 IX86_BUILTIN_MFENCE,
22422 IX86_BUILTIN_LFENCE,
22424 IX86_BUILTIN_BSRSI,
22425 IX86_BUILTIN_BSRDI,
22426 IX86_BUILTIN_RDPMC,
22427 IX86_BUILTIN_RDTSC,
22428 IX86_BUILTIN_RDTSCP,
22429 IX86_BUILTIN_ROLQI,
22430 IX86_BUILTIN_ROLHI,
22431 IX86_BUILTIN_RORQI,
22432 IX86_BUILTIN_RORHI,
22435 IX86_BUILTIN_ADDSUBPS,
22436 IX86_BUILTIN_HADDPS,
22437 IX86_BUILTIN_HSUBPS,
22438 IX86_BUILTIN_MOVSHDUP,
22439 IX86_BUILTIN_MOVSLDUP,
22440 IX86_BUILTIN_ADDSUBPD,
22441 IX86_BUILTIN_HADDPD,
22442 IX86_BUILTIN_HSUBPD,
22443 IX86_BUILTIN_LDDQU,
22445 IX86_BUILTIN_MONITOR,
22446 IX86_BUILTIN_MWAIT,
22449 IX86_BUILTIN_PHADDW,
22450 IX86_BUILTIN_PHADDD,
22451 IX86_BUILTIN_PHADDSW,
22452 IX86_BUILTIN_PHSUBW,
22453 IX86_BUILTIN_PHSUBD,
22454 IX86_BUILTIN_PHSUBSW,
22455 IX86_BUILTIN_PMADDUBSW,
22456 IX86_BUILTIN_PMULHRSW,
22457 IX86_BUILTIN_PSHUFB,
22458 IX86_BUILTIN_PSIGNB,
22459 IX86_BUILTIN_PSIGNW,
22460 IX86_BUILTIN_PSIGND,
22461 IX86_BUILTIN_PALIGNR,
22462 IX86_BUILTIN_PABSB,
22463 IX86_BUILTIN_PABSW,
22464 IX86_BUILTIN_PABSD,
22466 IX86_BUILTIN_PHADDW128,
22467 IX86_BUILTIN_PHADDD128,
22468 IX86_BUILTIN_PHADDSW128,
22469 IX86_BUILTIN_PHSUBW128,
22470 IX86_BUILTIN_PHSUBD128,
22471 IX86_BUILTIN_PHSUBSW128,
22472 IX86_BUILTIN_PMADDUBSW128,
22473 IX86_BUILTIN_PMULHRSW128,
22474 IX86_BUILTIN_PSHUFB128,
22475 IX86_BUILTIN_PSIGNB128,
22476 IX86_BUILTIN_PSIGNW128,
22477 IX86_BUILTIN_PSIGND128,
22478 IX86_BUILTIN_PALIGNR128,
22479 IX86_BUILTIN_PABSB128,
22480 IX86_BUILTIN_PABSW128,
22481 IX86_BUILTIN_PABSD128,
22483 /* AMDFAM10 - SSE4A New Instructions. */
22484 IX86_BUILTIN_MOVNTSD,
22485 IX86_BUILTIN_MOVNTSS,
22486 IX86_BUILTIN_EXTRQI,
22487 IX86_BUILTIN_EXTRQ,
22488 IX86_BUILTIN_INSERTQI,
22489 IX86_BUILTIN_INSERTQ,
22492 IX86_BUILTIN_BLENDPD,
22493 IX86_BUILTIN_BLENDPS,
22494 IX86_BUILTIN_BLENDVPD,
22495 IX86_BUILTIN_BLENDVPS,
22496 IX86_BUILTIN_PBLENDVB128,
22497 IX86_BUILTIN_PBLENDW128,
22502 IX86_BUILTIN_INSERTPS128,
22504 IX86_BUILTIN_MOVNTDQA,
22505 IX86_BUILTIN_MPSADBW128,
22506 IX86_BUILTIN_PACKUSDW128,
22507 IX86_BUILTIN_PCMPEQQ,
22508 IX86_BUILTIN_PHMINPOSUW128,
22510 IX86_BUILTIN_PMAXSB128,
22511 IX86_BUILTIN_PMAXSD128,
22512 IX86_BUILTIN_PMAXUD128,
22513 IX86_BUILTIN_PMAXUW128,
22515 IX86_BUILTIN_PMINSB128,
22516 IX86_BUILTIN_PMINSD128,
22517 IX86_BUILTIN_PMINUD128,
22518 IX86_BUILTIN_PMINUW128,
22520 IX86_BUILTIN_PMOVSXBW128,
22521 IX86_BUILTIN_PMOVSXBD128,
22522 IX86_BUILTIN_PMOVSXBQ128,
22523 IX86_BUILTIN_PMOVSXWD128,
22524 IX86_BUILTIN_PMOVSXWQ128,
22525 IX86_BUILTIN_PMOVSXDQ128,
22527 IX86_BUILTIN_PMOVZXBW128,
22528 IX86_BUILTIN_PMOVZXBD128,
22529 IX86_BUILTIN_PMOVZXBQ128,
22530 IX86_BUILTIN_PMOVZXWD128,
22531 IX86_BUILTIN_PMOVZXWQ128,
22532 IX86_BUILTIN_PMOVZXDQ128,
22534 IX86_BUILTIN_PMULDQ128,
22535 IX86_BUILTIN_PMULLD128,
22537 IX86_BUILTIN_ROUNDPD,
22538 IX86_BUILTIN_ROUNDPS,
22539 IX86_BUILTIN_ROUNDSD,
22540 IX86_BUILTIN_ROUNDSS,
22542 IX86_BUILTIN_PTESTZ,
22543 IX86_BUILTIN_PTESTC,
22544 IX86_BUILTIN_PTESTNZC,
22546 IX86_BUILTIN_VEC_INIT_V2SI,
22547 IX86_BUILTIN_VEC_INIT_V4HI,
22548 IX86_BUILTIN_VEC_INIT_V8QI,
22549 IX86_BUILTIN_VEC_EXT_V2DF,
22550 IX86_BUILTIN_VEC_EXT_V2DI,
22551 IX86_BUILTIN_VEC_EXT_V4SF,
22552 IX86_BUILTIN_VEC_EXT_V4SI,
22553 IX86_BUILTIN_VEC_EXT_V8HI,
22554 IX86_BUILTIN_VEC_EXT_V2SI,
22555 IX86_BUILTIN_VEC_EXT_V4HI,
22556 IX86_BUILTIN_VEC_EXT_V16QI,
22557 IX86_BUILTIN_VEC_SET_V2DI,
22558 IX86_BUILTIN_VEC_SET_V4SF,
22559 IX86_BUILTIN_VEC_SET_V4SI,
22560 IX86_BUILTIN_VEC_SET_V8HI,
22561 IX86_BUILTIN_VEC_SET_V4HI,
22562 IX86_BUILTIN_VEC_SET_V16QI,
22564 IX86_BUILTIN_VEC_PACK_SFIX,
22567 IX86_BUILTIN_CRC32QI,
22568 IX86_BUILTIN_CRC32HI,
22569 IX86_BUILTIN_CRC32SI,
22570 IX86_BUILTIN_CRC32DI,
22572 IX86_BUILTIN_PCMPESTRI128,
22573 IX86_BUILTIN_PCMPESTRM128,
22574 IX86_BUILTIN_PCMPESTRA128,
22575 IX86_BUILTIN_PCMPESTRC128,
22576 IX86_BUILTIN_PCMPESTRO128,
22577 IX86_BUILTIN_PCMPESTRS128,
22578 IX86_BUILTIN_PCMPESTRZ128,
22579 IX86_BUILTIN_PCMPISTRI128,
22580 IX86_BUILTIN_PCMPISTRM128,
22581 IX86_BUILTIN_PCMPISTRA128,
22582 IX86_BUILTIN_PCMPISTRC128,
22583 IX86_BUILTIN_PCMPISTRO128,
22584 IX86_BUILTIN_PCMPISTRS128,
22585 IX86_BUILTIN_PCMPISTRZ128,
22587 IX86_BUILTIN_PCMPGTQ,
22589 /* AES instructions */
22590 IX86_BUILTIN_AESENC128,
22591 IX86_BUILTIN_AESENCLAST128,
22592 IX86_BUILTIN_AESDEC128,
22593 IX86_BUILTIN_AESDECLAST128,
22594 IX86_BUILTIN_AESIMC128,
22595 IX86_BUILTIN_AESKEYGENASSIST128,
22597 /* PCLMUL instruction */
22598 IX86_BUILTIN_PCLMULQDQ128,
22601 IX86_BUILTIN_ADDPD256,
22602 IX86_BUILTIN_ADDPS256,
22603 IX86_BUILTIN_ADDSUBPD256,
22604 IX86_BUILTIN_ADDSUBPS256,
22605 IX86_BUILTIN_ANDPD256,
22606 IX86_BUILTIN_ANDPS256,
22607 IX86_BUILTIN_ANDNPD256,
22608 IX86_BUILTIN_ANDNPS256,
22609 IX86_BUILTIN_BLENDPD256,
22610 IX86_BUILTIN_BLENDPS256,
22611 IX86_BUILTIN_BLENDVPD256,
22612 IX86_BUILTIN_BLENDVPS256,
22613 IX86_BUILTIN_DIVPD256,
22614 IX86_BUILTIN_DIVPS256,
22615 IX86_BUILTIN_DPPS256,
22616 IX86_BUILTIN_HADDPD256,
22617 IX86_BUILTIN_HADDPS256,
22618 IX86_BUILTIN_HSUBPD256,
22619 IX86_BUILTIN_HSUBPS256,
22620 IX86_BUILTIN_MAXPD256,
22621 IX86_BUILTIN_MAXPS256,
22622 IX86_BUILTIN_MINPD256,
22623 IX86_BUILTIN_MINPS256,
22624 IX86_BUILTIN_MULPD256,
22625 IX86_BUILTIN_MULPS256,
22626 IX86_BUILTIN_ORPD256,
22627 IX86_BUILTIN_ORPS256,
22628 IX86_BUILTIN_SHUFPD256,
22629 IX86_BUILTIN_SHUFPS256,
22630 IX86_BUILTIN_SUBPD256,
22631 IX86_BUILTIN_SUBPS256,
22632 IX86_BUILTIN_XORPD256,
22633 IX86_BUILTIN_XORPS256,
22634 IX86_BUILTIN_CMPSD,
22635 IX86_BUILTIN_CMPSS,
22636 IX86_BUILTIN_CMPPD,
22637 IX86_BUILTIN_CMPPS,
22638 IX86_BUILTIN_CMPPD256,
22639 IX86_BUILTIN_CMPPS256,
22640 IX86_BUILTIN_CVTDQ2PD256,
22641 IX86_BUILTIN_CVTDQ2PS256,
22642 IX86_BUILTIN_CVTPD2PS256,
22643 IX86_BUILTIN_CVTPS2DQ256,
22644 IX86_BUILTIN_CVTPS2PD256,
22645 IX86_BUILTIN_CVTTPD2DQ256,
22646 IX86_BUILTIN_CVTPD2DQ256,
22647 IX86_BUILTIN_CVTTPS2DQ256,
22648 IX86_BUILTIN_EXTRACTF128PD256,
22649 IX86_BUILTIN_EXTRACTF128PS256,
22650 IX86_BUILTIN_EXTRACTF128SI256,
22651 IX86_BUILTIN_VZEROALL,
22652 IX86_BUILTIN_VZEROUPPER,
22653 IX86_BUILTIN_VPERMILVARPD,
22654 IX86_BUILTIN_VPERMILVARPS,
22655 IX86_BUILTIN_VPERMILVARPD256,
22656 IX86_BUILTIN_VPERMILVARPS256,
22657 IX86_BUILTIN_VPERMILPD,
22658 IX86_BUILTIN_VPERMILPS,
22659 IX86_BUILTIN_VPERMILPD256,
22660 IX86_BUILTIN_VPERMILPS256,
22661 IX86_BUILTIN_VPERMIL2PD,
22662 IX86_BUILTIN_VPERMIL2PS,
22663 IX86_BUILTIN_VPERMIL2PD256,
22664 IX86_BUILTIN_VPERMIL2PS256,
22665 IX86_BUILTIN_VPERM2F128PD256,
22666 IX86_BUILTIN_VPERM2F128PS256,
22667 IX86_BUILTIN_VPERM2F128SI256,
22668 IX86_BUILTIN_VBROADCASTSS,
22669 IX86_BUILTIN_VBROADCASTSD256,
22670 IX86_BUILTIN_VBROADCASTSS256,
22671 IX86_BUILTIN_VBROADCASTPD256,
22672 IX86_BUILTIN_VBROADCASTPS256,
22673 IX86_BUILTIN_VINSERTF128PD256,
22674 IX86_BUILTIN_VINSERTF128PS256,
22675 IX86_BUILTIN_VINSERTF128SI256,
22676 IX86_BUILTIN_LOADUPD256,
22677 IX86_BUILTIN_LOADUPS256,
22678 IX86_BUILTIN_STOREUPD256,
22679 IX86_BUILTIN_STOREUPS256,
22680 IX86_BUILTIN_LDDQU256,
22681 IX86_BUILTIN_MOVNTDQ256,
22682 IX86_BUILTIN_MOVNTPD256,
22683 IX86_BUILTIN_MOVNTPS256,
22684 IX86_BUILTIN_LOADDQU256,
22685 IX86_BUILTIN_STOREDQU256,
22686 IX86_BUILTIN_MASKLOADPD,
22687 IX86_BUILTIN_MASKLOADPS,
22688 IX86_BUILTIN_MASKSTOREPD,
22689 IX86_BUILTIN_MASKSTOREPS,
22690 IX86_BUILTIN_MASKLOADPD256,
22691 IX86_BUILTIN_MASKLOADPS256,
22692 IX86_BUILTIN_MASKSTOREPD256,
22693 IX86_BUILTIN_MASKSTOREPS256,
22694 IX86_BUILTIN_MOVSHDUP256,
22695 IX86_BUILTIN_MOVSLDUP256,
22696 IX86_BUILTIN_MOVDDUP256,
22698 IX86_BUILTIN_SQRTPD256,
22699 IX86_BUILTIN_SQRTPS256,
22700 IX86_BUILTIN_SQRTPS_NR256,
22701 IX86_BUILTIN_RSQRTPS256,
22702 IX86_BUILTIN_RSQRTPS_NR256,
22704 IX86_BUILTIN_RCPPS256,
22706 IX86_BUILTIN_ROUNDPD256,
22707 IX86_BUILTIN_ROUNDPS256,
22709 IX86_BUILTIN_UNPCKHPD256,
22710 IX86_BUILTIN_UNPCKLPD256,
22711 IX86_BUILTIN_UNPCKHPS256,
22712 IX86_BUILTIN_UNPCKLPS256,
22714 IX86_BUILTIN_SI256_SI,
22715 IX86_BUILTIN_PS256_PS,
22716 IX86_BUILTIN_PD256_PD,
22717 IX86_BUILTIN_SI_SI256,
22718 IX86_BUILTIN_PS_PS256,
22719 IX86_BUILTIN_PD_PD256,
22721 IX86_BUILTIN_VTESTZPD,
22722 IX86_BUILTIN_VTESTCPD,
22723 IX86_BUILTIN_VTESTNZCPD,
22724 IX86_BUILTIN_VTESTZPS,
22725 IX86_BUILTIN_VTESTCPS,
22726 IX86_BUILTIN_VTESTNZCPS,
22727 IX86_BUILTIN_VTESTZPD256,
22728 IX86_BUILTIN_VTESTCPD256,
22729 IX86_BUILTIN_VTESTNZCPD256,
22730 IX86_BUILTIN_VTESTZPS256,
22731 IX86_BUILTIN_VTESTCPS256,
22732 IX86_BUILTIN_VTESTNZCPS256,
22733 IX86_BUILTIN_PTESTZ256,
22734 IX86_BUILTIN_PTESTC256,
22735 IX86_BUILTIN_PTESTNZC256,
22737 IX86_BUILTIN_MOVMSKPD256,
22738 IX86_BUILTIN_MOVMSKPS256,
22740 /* TFmode support builtins. */
22742 IX86_BUILTIN_HUGE_VALQ,
22743 IX86_BUILTIN_FABSQ,
22744 IX86_BUILTIN_COPYSIGNQ,
22746 /* Vectorizer support builtins. */
22747 IX86_BUILTIN_CPYSGNPS,
22748 IX86_BUILTIN_CPYSGNPD,
22749 IX86_BUILTIN_CPYSGNPS256,
22750 IX86_BUILTIN_CPYSGNPD256,
22752 IX86_BUILTIN_CVTUDQ2PS,
22754 IX86_BUILTIN_VEC_PERM_V2DF,
22755 IX86_BUILTIN_VEC_PERM_V4SF,
22756 IX86_BUILTIN_VEC_PERM_V2DI,
22757 IX86_BUILTIN_VEC_PERM_V4SI,
22758 IX86_BUILTIN_VEC_PERM_V8HI,
22759 IX86_BUILTIN_VEC_PERM_V16QI,
22760 IX86_BUILTIN_VEC_PERM_V2DI_U,
22761 IX86_BUILTIN_VEC_PERM_V4SI_U,
22762 IX86_BUILTIN_VEC_PERM_V8HI_U,
22763 IX86_BUILTIN_VEC_PERM_V16QI_U,
22764 IX86_BUILTIN_VEC_PERM_V4DF,
22765 IX86_BUILTIN_VEC_PERM_V8SF,
22767 /* FMA4 and XOP instructions. */
22768 IX86_BUILTIN_VFMADDSS,
22769 IX86_BUILTIN_VFMADDSD,
22770 IX86_BUILTIN_VFMADDPS,
22771 IX86_BUILTIN_VFMADDPD,
22772 IX86_BUILTIN_VFMADDPS256,
22773 IX86_BUILTIN_VFMADDPD256,
22774 IX86_BUILTIN_VFMADDSUBPS,
22775 IX86_BUILTIN_VFMADDSUBPD,
22776 IX86_BUILTIN_VFMADDSUBPS256,
22777 IX86_BUILTIN_VFMADDSUBPD256,
22779 IX86_BUILTIN_VPCMOV,
22780 IX86_BUILTIN_VPCMOV_V2DI,
22781 IX86_BUILTIN_VPCMOV_V4SI,
22782 IX86_BUILTIN_VPCMOV_V8HI,
22783 IX86_BUILTIN_VPCMOV_V16QI,
22784 IX86_BUILTIN_VPCMOV_V4SF,
22785 IX86_BUILTIN_VPCMOV_V2DF,
22786 IX86_BUILTIN_VPCMOV256,
22787 IX86_BUILTIN_VPCMOV_V4DI256,
22788 IX86_BUILTIN_VPCMOV_V8SI256,
22789 IX86_BUILTIN_VPCMOV_V16HI256,
22790 IX86_BUILTIN_VPCMOV_V32QI256,
22791 IX86_BUILTIN_VPCMOV_V8SF256,
22792 IX86_BUILTIN_VPCMOV_V4DF256,
22794 IX86_BUILTIN_VPPERM,
22796 IX86_BUILTIN_VPMACSSWW,
22797 IX86_BUILTIN_VPMACSWW,
22798 IX86_BUILTIN_VPMACSSWD,
22799 IX86_BUILTIN_VPMACSWD,
22800 IX86_BUILTIN_VPMACSSDD,
22801 IX86_BUILTIN_VPMACSDD,
22802 IX86_BUILTIN_VPMACSSDQL,
22803 IX86_BUILTIN_VPMACSSDQH,
22804 IX86_BUILTIN_VPMACSDQL,
22805 IX86_BUILTIN_VPMACSDQH,
22806 IX86_BUILTIN_VPMADCSSWD,
22807 IX86_BUILTIN_VPMADCSWD,
22809 IX86_BUILTIN_VPHADDBW,
22810 IX86_BUILTIN_VPHADDBD,
22811 IX86_BUILTIN_VPHADDBQ,
22812 IX86_BUILTIN_VPHADDWD,
22813 IX86_BUILTIN_VPHADDWQ,
22814 IX86_BUILTIN_VPHADDDQ,
22815 IX86_BUILTIN_VPHADDUBW,
22816 IX86_BUILTIN_VPHADDUBD,
22817 IX86_BUILTIN_VPHADDUBQ,
22818 IX86_BUILTIN_VPHADDUWD,
22819 IX86_BUILTIN_VPHADDUWQ,
22820 IX86_BUILTIN_VPHADDUDQ,
22821 IX86_BUILTIN_VPHSUBBW,
22822 IX86_BUILTIN_VPHSUBWD,
22823 IX86_BUILTIN_VPHSUBDQ,
22825 IX86_BUILTIN_VPROTB,
22826 IX86_BUILTIN_VPROTW,
22827 IX86_BUILTIN_VPROTD,
22828 IX86_BUILTIN_VPROTQ,
22829 IX86_BUILTIN_VPROTB_IMM,
22830 IX86_BUILTIN_VPROTW_IMM,
22831 IX86_BUILTIN_VPROTD_IMM,
22832 IX86_BUILTIN_VPROTQ_IMM,
22834 IX86_BUILTIN_VPSHLB,
22835 IX86_BUILTIN_VPSHLW,
22836 IX86_BUILTIN_VPSHLD,
22837 IX86_BUILTIN_VPSHLQ,
22838 IX86_BUILTIN_VPSHAB,
22839 IX86_BUILTIN_VPSHAW,
22840 IX86_BUILTIN_VPSHAD,
22841 IX86_BUILTIN_VPSHAQ,
22843 IX86_BUILTIN_VFRCZSS,
22844 IX86_BUILTIN_VFRCZSD,
22845 IX86_BUILTIN_VFRCZPS,
22846 IX86_BUILTIN_VFRCZPD,
22847 IX86_BUILTIN_VFRCZPS256,
22848 IX86_BUILTIN_VFRCZPD256,
22850 IX86_BUILTIN_VPCOMEQUB,
22851 IX86_BUILTIN_VPCOMNEUB,
22852 IX86_BUILTIN_VPCOMLTUB,
22853 IX86_BUILTIN_VPCOMLEUB,
22854 IX86_BUILTIN_VPCOMGTUB,
22855 IX86_BUILTIN_VPCOMGEUB,
22856 IX86_BUILTIN_VPCOMFALSEUB,
22857 IX86_BUILTIN_VPCOMTRUEUB,
22859 IX86_BUILTIN_VPCOMEQUW,
22860 IX86_BUILTIN_VPCOMNEUW,
22861 IX86_BUILTIN_VPCOMLTUW,
22862 IX86_BUILTIN_VPCOMLEUW,
22863 IX86_BUILTIN_VPCOMGTUW,
22864 IX86_BUILTIN_VPCOMGEUW,
22865 IX86_BUILTIN_VPCOMFALSEUW,
22866 IX86_BUILTIN_VPCOMTRUEUW,
22868 IX86_BUILTIN_VPCOMEQUD,
22869 IX86_BUILTIN_VPCOMNEUD,
22870 IX86_BUILTIN_VPCOMLTUD,
22871 IX86_BUILTIN_VPCOMLEUD,
22872 IX86_BUILTIN_VPCOMGTUD,
22873 IX86_BUILTIN_VPCOMGEUD,
22874 IX86_BUILTIN_VPCOMFALSEUD,
22875 IX86_BUILTIN_VPCOMTRUEUD,
22877 IX86_BUILTIN_VPCOMEQUQ,
22878 IX86_BUILTIN_VPCOMNEUQ,
22879 IX86_BUILTIN_VPCOMLTUQ,
22880 IX86_BUILTIN_VPCOMLEUQ,
22881 IX86_BUILTIN_VPCOMGTUQ,
22882 IX86_BUILTIN_VPCOMGEUQ,
22883 IX86_BUILTIN_VPCOMFALSEUQ,
22884 IX86_BUILTIN_VPCOMTRUEUQ,
22886 IX86_BUILTIN_VPCOMEQB,
22887 IX86_BUILTIN_VPCOMNEB,
22888 IX86_BUILTIN_VPCOMLTB,
22889 IX86_BUILTIN_VPCOMLEB,
22890 IX86_BUILTIN_VPCOMGTB,
22891 IX86_BUILTIN_VPCOMGEB,
22892 IX86_BUILTIN_VPCOMFALSEB,
22893 IX86_BUILTIN_VPCOMTRUEB,
22895 IX86_BUILTIN_VPCOMEQW,
22896 IX86_BUILTIN_VPCOMNEW,
22897 IX86_BUILTIN_VPCOMLTW,
22898 IX86_BUILTIN_VPCOMLEW,
22899 IX86_BUILTIN_VPCOMGTW,
22900 IX86_BUILTIN_VPCOMGEW,
22901 IX86_BUILTIN_VPCOMFALSEW,
22902 IX86_BUILTIN_VPCOMTRUEW,
22904 IX86_BUILTIN_VPCOMEQD,
22905 IX86_BUILTIN_VPCOMNED,
22906 IX86_BUILTIN_VPCOMLTD,
22907 IX86_BUILTIN_VPCOMLED,
22908 IX86_BUILTIN_VPCOMGTD,
22909 IX86_BUILTIN_VPCOMGED,
22910 IX86_BUILTIN_VPCOMFALSED,
22911 IX86_BUILTIN_VPCOMTRUED,
22913 IX86_BUILTIN_VPCOMEQQ,
22914 IX86_BUILTIN_VPCOMNEQ,
22915 IX86_BUILTIN_VPCOMLTQ,
22916 IX86_BUILTIN_VPCOMLEQ,
22917 IX86_BUILTIN_VPCOMGTQ,
22918 IX86_BUILTIN_VPCOMGEQ,
22919 IX86_BUILTIN_VPCOMFALSEQ,
22920 IX86_BUILTIN_VPCOMTRUEQ,
22922 /* LWP instructions. */
22923 IX86_BUILTIN_LLWPCB,
22924 IX86_BUILTIN_SLWPCB,
22925 IX86_BUILTIN_LWPVAL32,
22926 IX86_BUILTIN_LWPVAL64,
22927 IX86_BUILTIN_LWPINS32,
22928 IX86_BUILTIN_LWPINS64,
22932 /* FSGSBASE instructions. */
22933 IX86_BUILTIN_RDFSBASE32,
22934 IX86_BUILTIN_RDFSBASE64,
22935 IX86_BUILTIN_RDGSBASE32,
22936 IX86_BUILTIN_RDGSBASE64,
22937 IX86_BUILTIN_WRFSBASE32,
22938 IX86_BUILTIN_WRFSBASE64,
22939 IX86_BUILTIN_WRGSBASE32,
22940 IX86_BUILTIN_WRGSBASE64,
22942 /* RDRND instructions. */
22943 IX86_BUILTIN_RDRAND16,
22944 IX86_BUILTIN_RDRAND32,
22945 IX86_BUILTIN_RDRAND64,
22947 /* F16C instructions. */
22948 IX86_BUILTIN_CVTPH2PS,
22949 IX86_BUILTIN_CVTPH2PS256,
22950 IX86_BUILTIN_CVTPS2PH,
22951 IX86_BUILTIN_CVTPS2PH256,
22956 /* Table for the ix86 builtin decls. */
22957 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22959 /* Table of all of the builtin functions that are possible with different ISA's
22960 but are waiting to be built until a function is declared to use that
22962 struct builtin_isa {
22963 const char *name; /* function name */
22964 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22965 int isa; /* isa_flags this builtin is defined for */
22966 bool const_p; /* true if the declaration is constant */
22967 bool set_and_not_built_p;
22970 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22973 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22974 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22975 function decl in the ix86_builtins array. Returns the function decl or
22976 NULL_TREE, if the builtin was not added.
22978 If the front end has a special hook for builtin functions, delay adding
22979 builtin functions that aren't in the current ISA until the ISA is changed
22980 with function specific optimization. Doing so, can save about 300K for the
22981 default compiler. When the builtin is expanded, check at that time whether
22984 If the front end doesn't have a special hook, record all builtins, even if
22985 it isn't an instruction set in the current ISA in case the user uses
22986 function specific options for a different ISA, so that we don't get scope
22987 errors if a builtin is added in the middle of a function scope. */
22990 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22991 enum ix86_builtins code)
22993 tree decl = NULL_TREE;
22995 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22997 ix86_builtins_isa[(int) code].isa = mask;
22999 mask &= ~OPTION_MASK_ISA_64BIT;
23001 || (mask & ix86_isa_flags) != 0
23002 || (lang_hooks.builtin_function
23003 == lang_hooks.builtin_function_ext_scope))
23006 tree type = ix86_get_builtin_func_type (tcode);
23007 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
23009 ix86_builtins[(int) code] = decl;
23010 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
23014 ix86_builtins[(int) code] = NULL_TREE;
23015 ix86_builtins_isa[(int) code].tcode = tcode;
23016 ix86_builtins_isa[(int) code].name = name;
23017 ix86_builtins_isa[(int) code].const_p = false;
23018 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
23025 /* Like def_builtin, but also marks the function decl "const". */
23028 def_builtin_const (int mask, const char *name,
23029 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
23031 tree decl = def_builtin (mask, name, tcode, code);
23033 TREE_READONLY (decl) = 1;
23035 ix86_builtins_isa[(int) code].const_p = true;
23040 /* Add any new builtin functions for a given ISA that may not have been
23041 declared. This saves a bit of space compared to adding all of the
23042 declarations to the tree, even if we didn't use them. */
23045 ix86_add_new_builtins (int isa)
23049 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
23051 if ((ix86_builtins_isa[i].isa & isa) != 0
23052 && ix86_builtins_isa[i].set_and_not_built_p)
23056 /* Don't define the builtin again. */
23057 ix86_builtins_isa[i].set_and_not_built_p = false;
23059 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
23060 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23061 type, i, BUILT_IN_MD, NULL,
23064 ix86_builtins[i] = decl;
23065 if (ix86_builtins_isa[i].const_p)
23066 TREE_READONLY (decl) = 1;
23071 /* Bits for builtin_description.flag. */
23073 /* Set when we don't support the comparison natively, and should
23074 swap_comparison in order to support it. */
23075 #define BUILTIN_DESC_SWAP_OPERANDS 1
23077 struct builtin_description
23079 const unsigned int mask;
23080 const enum insn_code icode;
23081 const char *const name;
23082 const enum ix86_builtins code;
23083 const enum rtx_code comparison;
23087 static const struct builtin_description bdesc_comi[] =
23089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23098 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23115 static const struct builtin_description bdesc_pcmpestr[] =
23118 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23119 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23120 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23121 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23122 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23123 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23124 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23127 static const struct builtin_description bdesc_pcmpistr[] =
23130 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23131 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23132 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23133 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23134 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23135 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23136 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23139 /* Special builtins with variable number of arguments. */
23140 static const struct builtin_description bdesc_special_args[] =
23142 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23143 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23149 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23158 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23159 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23161 /* SSE or 3DNow!A */
23162 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23163 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23180 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23183 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23186 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23187 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23193 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23194 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23195 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23220 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23221 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23222 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23223 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23224 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23225 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23228 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23229 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23230 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23231 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23232 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23233 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23234 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23235 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23238 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23239 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23240 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23243 /* Builtins with variable number of arguments. */
23244 static const struct builtin_description bdesc_args[] =
23246 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23247 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23248 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23249 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23250 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23251 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23252 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23319 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23320 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23321 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23322 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23324 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23325 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23326 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23327 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23328 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23329 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23330 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23331 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23332 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23333 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23334 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23335 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23336 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23337 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23338 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23341 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23342 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23343 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23344 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23345 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23346 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23351 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23353 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23357 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23364 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23365 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23366 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23396 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23397 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23401 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23403 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23404 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23406 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23411 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23412 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23416 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23418 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23424 /* SSE MMX or 3Dnow!A */
23425 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23426 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23427 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23429 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23430 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23431 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23432 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23434 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23435 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23437 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23442 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23443 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23444 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23445 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23446 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23447 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23448 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23449 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23450 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23451 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23452 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23453 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23472 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23473 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23479 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23480 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23481 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23482 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23510 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23514 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23516 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23517 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23519 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23522 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23523 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23525 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23527 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23528 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23529 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23530 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23531 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23532 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23533 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23534 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23545 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23546 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23548 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23550 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23551 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23563 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23564 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23565 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23568 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23569 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23570 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23571 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23572 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23573 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23574 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23575 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23581 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23590 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23595 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23596 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23597 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23598 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23599 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23600 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23603 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23604 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23605 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23606 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23607 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23608 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23610 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23611 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23612 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23613 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23621 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23622 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23627 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23628 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23631 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23632 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23634 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23635 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23636 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23637 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23638 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23639 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23642 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23643 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23644 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23645 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23646 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23647 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23649 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23650 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23652 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23653 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23654 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23655 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23657 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23660 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23664 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23665 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23666 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23680 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23681 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23682 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23683 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23685 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23686 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23687 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23690 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23691 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23692 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23694 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23695 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23696 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23718 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23719 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23720 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23721 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23723 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23724 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23725 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23728 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23729 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23730 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23731 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23732 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23735 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23736 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23737 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23738 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23741 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23742 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23744 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23745 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23746 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23747 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23750 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23753 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23754 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23757 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23758 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23761 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23767 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23768 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23769 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23770 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23771 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23772 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23773 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23774 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23775 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23776 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23777 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23778 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23824 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23826 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23828 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23843 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23844 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23845 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23866 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23867 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23869 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23872 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23873 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23874 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23875 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23878 /* FMA4 and XOP. */
23879 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23880 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23881 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23882 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23883 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23884 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23885 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23886 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23887 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23888 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23889 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23890 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23891 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23892 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23893 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23894 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23895 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23896 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23897 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23898 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23899 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23900 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23901 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23902 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23903 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23904 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23905 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23906 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23907 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23908 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23909 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23910 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23911 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23912 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23913 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23914 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23915 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23916 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23917 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23918 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23919 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23920 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23921 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23922 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23923 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23924 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23925 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23926 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23927 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23928 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23929 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23930 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23932 static const struct builtin_description bdesc_multi_arg[] =
23934 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
23935 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
23936 UNKNOWN, (int)MULTI_ARG_3_SF },
23937 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
23938 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
23939 UNKNOWN, (int)MULTI_ARG_3_DF },
23941 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
23942 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
23943 UNKNOWN, (int)MULTI_ARG_3_SF },
23944 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
23945 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
23946 UNKNOWN, (int)MULTI_ARG_3_DF },
23947 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
23948 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
23949 UNKNOWN, (int)MULTI_ARG_3_SF2 },
23950 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
23951 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
23952 UNKNOWN, (int)MULTI_ARG_3_DF2 },
23954 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
23955 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
23956 UNKNOWN, (int)MULTI_ARG_3_SF },
23957 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
23958 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
23959 UNKNOWN, (int)MULTI_ARG_3_DF },
23960 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
23961 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
23962 UNKNOWN, (int)MULTI_ARG_3_SF2 },
23963 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
23964 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
23965 UNKNOWN, (int)MULTI_ARG_3_DF2 },
23967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23992 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
24000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
24001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
24002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
24003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
24004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
24005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
24006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
24008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
24009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
24010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
24012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
24013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
24015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
24016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
24017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
24018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
24019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
24020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
24022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
24039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
24042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
24043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
24044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
24046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
24047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
24050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
24051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
24052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
24054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
24055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
24058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
24059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
24060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
24062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
24066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
24067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
24068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24127 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24128 in the current target ISA to allow the user to compile particular modules
24129 with different target specific options that differ from the command line
24132 ix86_init_mmx_sse_builtins (void)
24134 const struct builtin_description * d;
24135 enum ix86_builtin_func_type ftype;
24138 /* Add all special builtins with variable number of operands. */
24139 for (i = 0, d = bdesc_special_args;
24140 i < ARRAY_SIZE (bdesc_special_args);
24146 ftype = (enum ix86_builtin_func_type) d->flag;
24147 def_builtin (d->mask, d->name, ftype, d->code);
24150 /* Add all builtins with variable number of operands. */
24151 for (i = 0, d = bdesc_args;
24152 i < ARRAY_SIZE (bdesc_args);
24158 ftype = (enum ix86_builtin_func_type) d->flag;
24159 def_builtin_const (d->mask, d->name, ftype, d->code);
24162 /* pcmpestr[im] insns. */
24163 for (i = 0, d = bdesc_pcmpestr;
24164 i < ARRAY_SIZE (bdesc_pcmpestr);
24167 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24168 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24170 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24171 def_builtin_const (d->mask, d->name, ftype, d->code);
24174 /* pcmpistr[im] insns. */
24175 for (i = 0, d = bdesc_pcmpistr;
24176 i < ARRAY_SIZE (bdesc_pcmpistr);
24179 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24180 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24182 ftype = INT_FTYPE_V16QI_V16QI_INT;
24183 def_builtin_const (d->mask, d->name, ftype, d->code);
24186 /* comi/ucomi insns. */
24187 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24189 if (d->mask == OPTION_MASK_ISA_SSE2)
24190 ftype = INT_FTYPE_V2DF_V2DF;
24192 ftype = INT_FTYPE_V4SF_V4SF;
24193 def_builtin_const (d->mask, d->name, ftype, d->code);
24197 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24198 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24199 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24200 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24202 /* SSE or 3DNow!A */
24203 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24204 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24205 IX86_BUILTIN_MASKMOVQ);
24208 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24209 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24211 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24212 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24213 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24214 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24217 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24218 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24219 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24220 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24223 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24224 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24225 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24226 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24227 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24228 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24229 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24230 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24231 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24232 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24233 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24234 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24237 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24238 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24240 /* MMX access to the vec_init patterns. */
24241 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24242 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24244 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24245 V4HI_FTYPE_HI_HI_HI_HI,
24246 IX86_BUILTIN_VEC_INIT_V4HI);
24248 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24249 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24250 IX86_BUILTIN_VEC_INIT_V8QI);
24252 /* Access to the vec_extract patterns. */
24253 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24254 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24255 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24256 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24257 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24258 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24259 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24260 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24261 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24262 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24264 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24265 "__builtin_ia32_vec_ext_v4hi",
24266 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24268 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24269 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24271 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24272 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24274 /* Access to the vec_set patterns. */
24275 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24276 "__builtin_ia32_vec_set_v2di",
24277 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24279 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24280 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24282 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24283 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24285 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24286 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24288 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24289 "__builtin_ia32_vec_set_v4hi",
24290 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24292 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24293 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24295 /* Add FMA4 multi-arg argument instructions */
24296 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24301 ftype = (enum ix86_builtin_func_type) d->flag;
24302 def_builtin_const (d->mask, d->name, ftype, d->code);
24306 /* Internal method for ix86_init_builtins. */
24309 ix86_init_builtins_va_builtins_abi (void)
24311 tree ms_va_ref, sysv_va_ref;
24312 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24313 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24314 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24315 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24319 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24320 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24321 ms_va_ref = build_reference_type (ms_va_list_type_node);
24323 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24326 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24327 fnvoid_va_start_ms =
24328 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24329 fnvoid_va_end_sysv =
24330 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24331 fnvoid_va_start_sysv =
24332 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24334 fnvoid_va_copy_ms =
24335 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24337 fnvoid_va_copy_sysv =
24338 build_function_type_list (void_type_node, sysv_va_ref,
24339 sysv_va_ref, NULL_TREE);
24341 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24342 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24343 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24344 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24345 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24346 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24347 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24348 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24349 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24350 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24351 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24352 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24356 ix86_init_builtin_types (void)
24358 tree float128_type_node, float80_type_node;
24360 /* The __float80 type. */
24361 float80_type_node = long_double_type_node;
24362 if (TYPE_MODE (float80_type_node) != XFmode)
24364 /* The __float80 type. */
24365 float80_type_node = make_node (REAL_TYPE);
24367 TYPE_PRECISION (float80_type_node) = 80;
24368 layout_type (float80_type_node);
24370 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24372 /* The __float128 type. */
24373 float128_type_node = make_node (REAL_TYPE);
24374 TYPE_PRECISION (float128_type_node) = 128;
24375 layout_type (float128_type_node);
24376 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24378 /* This macro is built by i386-builtin-types.awk. */
24379 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24383 ix86_init_builtins (void)
24387 ix86_init_builtin_types ();
24389 /* TFmode support builtins. */
24390 def_builtin_const (0, "__builtin_infq",
24391 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24392 def_builtin_const (0, "__builtin_huge_valq",
24393 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24395 /* We will expand them to normal call if SSE2 isn't available since
24396 they are used by libgcc. */
24397 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24398 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24399 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24400 TREE_READONLY (t) = 1;
24401 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24403 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24404 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24405 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24406 TREE_READONLY (t) = 1;
24407 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24409 ix86_init_mmx_sse_builtins ();
24412 ix86_init_builtins_va_builtins_abi ();
24415 /* Return the ix86 builtin for CODE. */
24418 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24420 if (code >= IX86_BUILTIN_MAX)
24421 return error_mark_node;
24423 return ix86_builtins[code];
24426 /* Errors in the source file can cause expand_expr to return const0_rtx
24427 where we expect a vector. To avoid crashing, use one of the vector
24428 clear instructions. */
24430 safe_vector_operand (rtx x, enum machine_mode mode)
24432 if (x == const0_rtx)
24433 x = CONST0_RTX (mode);
24437 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24440 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24443 tree arg0 = CALL_EXPR_ARG (exp, 0);
24444 tree arg1 = CALL_EXPR_ARG (exp, 1);
24445 rtx op0 = expand_normal (arg0);
24446 rtx op1 = expand_normal (arg1);
24447 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24448 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24449 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24451 if (VECTOR_MODE_P (mode0))
24452 op0 = safe_vector_operand (op0, mode0);
24453 if (VECTOR_MODE_P (mode1))
24454 op1 = safe_vector_operand (op1, mode1);
24456 if (optimize || !target
24457 || GET_MODE (target) != tmode
24458 || !insn_data[icode].operand[0].predicate (target, tmode))
24459 target = gen_reg_rtx (tmode);
24461 if (GET_MODE (op1) == SImode && mode1 == TImode)
24463 rtx x = gen_reg_rtx (V4SImode);
24464 emit_insn (gen_sse2_loadd (x, op1));
24465 op1 = gen_lowpart (TImode, x);
24468 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24469 op0 = copy_to_mode_reg (mode0, op0);
24470 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24471 op1 = copy_to_mode_reg (mode1, op1);
24473 pat = GEN_FCN (icode) (target, op0, op1);
24482 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24485 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24486 enum ix86_builtin_func_type m_type,
24487 enum rtx_code sub_code)
24492 bool comparison_p = false;
24494 bool last_arg_constant = false;
24495 int num_memory = 0;
24498 enum machine_mode mode;
24501 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24505 case MULTI_ARG_4_DF2_DI_I:
24506 case MULTI_ARG_4_DF2_DI_I1:
24507 case MULTI_ARG_4_SF2_SI_I:
24508 case MULTI_ARG_4_SF2_SI_I1:
24510 last_arg_constant = true;
24513 case MULTI_ARG_3_SF:
24514 case MULTI_ARG_3_DF:
24515 case MULTI_ARG_3_SF2:
24516 case MULTI_ARG_3_DF2:
24517 case MULTI_ARG_3_DI:
24518 case MULTI_ARG_3_SI:
24519 case MULTI_ARG_3_SI_DI:
24520 case MULTI_ARG_3_HI:
24521 case MULTI_ARG_3_HI_SI:
24522 case MULTI_ARG_3_QI:
24523 case MULTI_ARG_3_DI2:
24524 case MULTI_ARG_3_SI2:
24525 case MULTI_ARG_3_HI2:
24526 case MULTI_ARG_3_QI2:
24530 case MULTI_ARG_2_SF:
24531 case MULTI_ARG_2_DF:
24532 case MULTI_ARG_2_DI:
24533 case MULTI_ARG_2_SI:
24534 case MULTI_ARG_2_HI:
24535 case MULTI_ARG_2_QI:
24539 case MULTI_ARG_2_DI_IMM:
24540 case MULTI_ARG_2_SI_IMM:
24541 case MULTI_ARG_2_HI_IMM:
24542 case MULTI_ARG_2_QI_IMM:
24544 last_arg_constant = true;
24547 case MULTI_ARG_1_SF:
24548 case MULTI_ARG_1_DF:
24549 case MULTI_ARG_1_SF2:
24550 case MULTI_ARG_1_DF2:
24551 case MULTI_ARG_1_DI:
24552 case MULTI_ARG_1_SI:
24553 case MULTI_ARG_1_HI:
24554 case MULTI_ARG_1_QI:
24555 case MULTI_ARG_1_SI_DI:
24556 case MULTI_ARG_1_HI_DI:
24557 case MULTI_ARG_1_HI_SI:
24558 case MULTI_ARG_1_QI_DI:
24559 case MULTI_ARG_1_QI_SI:
24560 case MULTI_ARG_1_QI_HI:
24564 case MULTI_ARG_2_DI_CMP:
24565 case MULTI_ARG_2_SI_CMP:
24566 case MULTI_ARG_2_HI_CMP:
24567 case MULTI_ARG_2_QI_CMP:
24569 comparison_p = true;
24572 case MULTI_ARG_2_SF_TF:
24573 case MULTI_ARG_2_DF_TF:
24574 case MULTI_ARG_2_DI_TF:
24575 case MULTI_ARG_2_SI_TF:
24576 case MULTI_ARG_2_HI_TF:
24577 case MULTI_ARG_2_QI_TF:
24583 gcc_unreachable ();
24586 if (optimize || !target
24587 || GET_MODE (target) != tmode
24588 || !insn_data[icode].operand[0].predicate (target, tmode))
24589 target = gen_reg_rtx (tmode);
24591 gcc_assert (nargs <= 4);
24593 for (i = 0; i < nargs; i++)
24595 tree arg = CALL_EXPR_ARG (exp, i);
24596 rtx op = expand_normal (arg);
24597 int adjust = (comparison_p) ? 1 : 0;
24598 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24600 if (last_arg_constant && i == nargs-1)
24602 if (!CONST_INT_P (op))
24604 error ("last argument must be an immediate");
24605 return gen_reg_rtx (tmode);
24610 if (VECTOR_MODE_P (mode))
24611 op = safe_vector_operand (op, mode);
24613 /* If we aren't optimizing, only allow one memory operand to be
24615 if (memory_operand (op, mode))
24618 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24621 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24623 op = force_reg (mode, op);
24627 args[i].mode = mode;
24633 pat = GEN_FCN (icode) (target, args[0].op);
24638 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24639 GEN_INT ((int)sub_code));
24640 else if (! comparison_p)
24641 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24644 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24648 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24653 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24657 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24661 gcc_unreachable ();
24671 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24672 insns with vec_merge. */
24675 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24679 tree arg0 = CALL_EXPR_ARG (exp, 0);
24680 rtx op1, op0 = expand_normal (arg0);
24681 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24682 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24684 if (optimize || !target
24685 || GET_MODE (target) != tmode
24686 || !insn_data[icode].operand[0].predicate (target, tmode))
24687 target = gen_reg_rtx (tmode);
24689 if (VECTOR_MODE_P (mode0))
24690 op0 = safe_vector_operand (op0, mode0);
24692 if ((optimize && !register_operand (op0, mode0))
24693 || !insn_data[icode].operand[1].predicate (op0, mode0))
24694 op0 = copy_to_mode_reg (mode0, op0);
24697 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24698 op1 = copy_to_mode_reg (mode0, op1);
24700 pat = GEN_FCN (icode) (target, op0, op1);
24707 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24710 ix86_expand_sse_compare (const struct builtin_description *d,
24711 tree exp, rtx target, bool swap)
24714 tree arg0 = CALL_EXPR_ARG (exp, 0);
24715 tree arg1 = CALL_EXPR_ARG (exp, 1);
24716 rtx op0 = expand_normal (arg0);
24717 rtx op1 = expand_normal (arg1);
24719 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24720 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24721 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24722 enum rtx_code comparison = d->comparison;
24724 if (VECTOR_MODE_P (mode0))
24725 op0 = safe_vector_operand (op0, mode0);
24726 if (VECTOR_MODE_P (mode1))
24727 op1 = safe_vector_operand (op1, mode1);
24729 /* Swap operands if we have a comparison that isn't available in
24733 rtx tmp = gen_reg_rtx (mode1);
24734 emit_move_insn (tmp, op1);
24739 if (optimize || !target
24740 || GET_MODE (target) != tmode
24741 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24742 target = gen_reg_rtx (tmode);
24744 if ((optimize && !register_operand (op0, mode0))
24745 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24746 op0 = copy_to_mode_reg (mode0, op0);
24747 if ((optimize && !register_operand (op1, mode1))
24748 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24749 op1 = copy_to_mode_reg (mode1, op1);
24751 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24752 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24759 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24762 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24766 tree arg0 = CALL_EXPR_ARG (exp, 0);
24767 tree arg1 = CALL_EXPR_ARG (exp, 1);
24768 rtx op0 = expand_normal (arg0);
24769 rtx op1 = expand_normal (arg1);
24770 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24771 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24772 enum rtx_code comparison = d->comparison;
24774 if (VECTOR_MODE_P (mode0))
24775 op0 = safe_vector_operand (op0, mode0);
24776 if (VECTOR_MODE_P (mode1))
24777 op1 = safe_vector_operand (op1, mode1);
24779 /* Swap operands if we have a comparison that isn't available in
24781 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24788 target = gen_reg_rtx (SImode);
24789 emit_move_insn (target, const0_rtx);
24790 target = gen_rtx_SUBREG (QImode, target, 0);
24792 if ((optimize && !register_operand (op0, mode0))
24793 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24794 op0 = copy_to_mode_reg (mode0, op0);
24795 if ((optimize && !register_operand (op1, mode1))
24796 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24797 op1 = copy_to_mode_reg (mode1, op1);
24799 pat = GEN_FCN (d->icode) (op0, op1);
24803 emit_insn (gen_rtx_SET (VOIDmode,
24804 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24805 gen_rtx_fmt_ee (comparison, QImode,
24809 return SUBREG_REG (target);
24812 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24815 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24819 tree arg0 = CALL_EXPR_ARG (exp, 0);
24820 tree arg1 = CALL_EXPR_ARG (exp, 1);
24821 rtx op0 = expand_normal (arg0);
24822 rtx op1 = expand_normal (arg1);
24823 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24824 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24825 enum rtx_code comparison = d->comparison;
24827 if (VECTOR_MODE_P (mode0))
24828 op0 = safe_vector_operand (op0, mode0);
24829 if (VECTOR_MODE_P (mode1))
24830 op1 = safe_vector_operand (op1, mode1);
24832 target = gen_reg_rtx (SImode);
24833 emit_move_insn (target, const0_rtx);
24834 target = gen_rtx_SUBREG (QImode, target, 0);
24836 if ((optimize && !register_operand (op0, mode0))
24837 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24838 op0 = copy_to_mode_reg (mode0, op0);
24839 if ((optimize && !register_operand (op1, mode1))
24840 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24841 op1 = copy_to_mode_reg (mode1, op1);
24843 pat = GEN_FCN (d->icode) (op0, op1);
24847 emit_insn (gen_rtx_SET (VOIDmode,
24848 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24849 gen_rtx_fmt_ee (comparison, QImode,
24853 return SUBREG_REG (target);
24856 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24859 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24860 tree exp, rtx target)
24863 tree arg0 = CALL_EXPR_ARG (exp, 0);
24864 tree arg1 = CALL_EXPR_ARG (exp, 1);
24865 tree arg2 = CALL_EXPR_ARG (exp, 2);
24866 tree arg3 = CALL_EXPR_ARG (exp, 3);
24867 tree arg4 = CALL_EXPR_ARG (exp, 4);
24868 rtx scratch0, scratch1;
24869 rtx op0 = expand_normal (arg0);
24870 rtx op1 = expand_normal (arg1);
24871 rtx op2 = expand_normal (arg2);
24872 rtx op3 = expand_normal (arg3);
24873 rtx op4 = expand_normal (arg4);
24874 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24876 tmode0 = insn_data[d->icode].operand[0].mode;
24877 tmode1 = insn_data[d->icode].operand[1].mode;
24878 modev2 = insn_data[d->icode].operand[2].mode;
24879 modei3 = insn_data[d->icode].operand[3].mode;
24880 modev4 = insn_data[d->icode].operand[4].mode;
24881 modei5 = insn_data[d->icode].operand[5].mode;
24882 modeimm = insn_data[d->icode].operand[6].mode;
24884 if (VECTOR_MODE_P (modev2))
24885 op0 = safe_vector_operand (op0, modev2);
24886 if (VECTOR_MODE_P (modev4))
24887 op2 = safe_vector_operand (op2, modev4);
24889 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24890 op0 = copy_to_mode_reg (modev2, op0);
24891 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24892 op1 = copy_to_mode_reg (modei3, op1);
24893 if ((optimize && !register_operand (op2, modev4))
24894 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24895 op2 = copy_to_mode_reg (modev4, op2);
24896 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24897 op3 = copy_to_mode_reg (modei5, op3);
24899 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24901 error ("the fifth argument must be a 8-bit immediate");
24905 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24907 if (optimize || !target
24908 || GET_MODE (target) != tmode0
24909 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24910 target = gen_reg_rtx (tmode0);
24912 scratch1 = gen_reg_rtx (tmode1);
24914 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24916 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24918 if (optimize || !target
24919 || GET_MODE (target) != tmode1
24920 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24921 target = gen_reg_rtx (tmode1);
24923 scratch0 = gen_reg_rtx (tmode0);
24925 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24929 gcc_assert (d->flag);
24931 scratch0 = gen_reg_rtx (tmode0);
24932 scratch1 = gen_reg_rtx (tmode1);
24934 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24944 target = gen_reg_rtx (SImode);
24945 emit_move_insn (target, const0_rtx);
24946 target = gen_rtx_SUBREG (QImode, target, 0);
24949 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24950 gen_rtx_fmt_ee (EQ, QImode,
24951 gen_rtx_REG ((enum machine_mode) d->flag,
24954 return SUBREG_REG (target);
24961 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24964 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24965 tree exp, rtx target)
24968 tree arg0 = CALL_EXPR_ARG (exp, 0);
24969 tree arg1 = CALL_EXPR_ARG (exp, 1);
24970 tree arg2 = CALL_EXPR_ARG (exp, 2);
24971 rtx scratch0, scratch1;
24972 rtx op0 = expand_normal (arg0);
24973 rtx op1 = expand_normal (arg1);
24974 rtx op2 = expand_normal (arg2);
24975 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24977 tmode0 = insn_data[d->icode].operand[0].mode;
24978 tmode1 = insn_data[d->icode].operand[1].mode;
24979 modev2 = insn_data[d->icode].operand[2].mode;
24980 modev3 = insn_data[d->icode].operand[3].mode;
24981 modeimm = insn_data[d->icode].operand[4].mode;
24983 if (VECTOR_MODE_P (modev2))
24984 op0 = safe_vector_operand (op0, modev2);
24985 if (VECTOR_MODE_P (modev3))
24986 op1 = safe_vector_operand (op1, modev3);
24988 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24989 op0 = copy_to_mode_reg (modev2, op0);
24990 if ((optimize && !register_operand (op1, modev3))
24991 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24992 op1 = copy_to_mode_reg (modev3, op1);
24994 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24996 error ("the third argument must be a 8-bit immediate");
25000 if (d->code == IX86_BUILTIN_PCMPISTRI128)
25002 if (optimize || !target
25003 || GET_MODE (target) != tmode0
25004 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25005 target = gen_reg_rtx (tmode0);
25007 scratch1 = gen_reg_rtx (tmode1);
25009 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
25011 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
25013 if (optimize || !target
25014 || GET_MODE (target) != tmode1
25015 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25016 target = gen_reg_rtx (tmode1);
25018 scratch0 = gen_reg_rtx (tmode0);
25020 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
25024 gcc_assert (d->flag);
25026 scratch0 = gen_reg_rtx (tmode0);
25027 scratch1 = gen_reg_rtx (tmode1);
25029 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
25039 target = gen_reg_rtx (SImode);
25040 emit_move_insn (target, const0_rtx);
25041 target = gen_rtx_SUBREG (QImode, target, 0);
25044 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25045 gen_rtx_fmt_ee (EQ, QImode,
25046 gen_rtx_REG ((enum machine_mode) d->flag,
25049 return SUBREG_REG (target);
25055 /* Subroutine of ix86_expand_builtin to take care of insns with
25056 variable number of operands. */
25059 ix86_expand_args_builtin (const struct builtin_description *d,
25060 tree exp, rtx target)
25062 rtx pat, real_target;
25063 unsigned int i, nargs;
25064 unsigned int nargs_constant = 0;
25065 int num_memory = 0;
25069 enum machine_mode mode;
25071 bool last_arg_count = false;
25072 enum insn_code icode = d->icode;
25073 const struct insn_data_d *insn_p = &insn_data[icode];
25074 enum machine_mode tmode = insn_p->operand[0].mode;
25075 enum machine_mode rmode = VOIDmode;
25077 enum rtx_code comparison = d->comparison;
25079 switch ((enum ix86_builtin_func_type) d->flag)
25081 case INT_FTYPE_V8SF_V8SF_PTEST:
25082 case INT_FTYPE_V4DI_V4DI_PTEST:
25083 case INT_FTYPE_V4DF_V4DF_PTEST:
25084 case INT_FTYPE_V4SF_V4SF_PTEST:
25085 case INT_FTYPE_V2DI_V2DI_PTEST:
25086 case INT_FTYPE_V2DF_V2DF_PTEST:
25087 return ix86_expand_sse_ptest (d, exp, target);
25088 case FLOAT128_FTYPE_FLOAT128:
25089 case FLOAT_FTYPE_FLOAT:
25090 case INT_FTYPE_INT:
25091 case UINT64_FTYPE_INT:
25092 case UINT16_FTYPE_UINT16:
25093 case INT64_FTYPE_INT64:
25094 case INT64_FTYPE_V4SF:
25095 case INT64_FTYPE_V2DF:
25096 case INT_FTYPE_V16QI:
25097 case INT_FTYPE_V8QI:
25098 case INT_FTYPE_V8SF:
25099 case INT_FTYPE_V4DF:
25100 case INT_FTYPE_V4SF:
25101 case INT_FTYPE_V2DF:
25102 case V16QI_FTYPE_V16QI:
25103 case V8SI_FTYPE_V8SF:
25104 case V8SI_FTYPE_V4SI:
25105 case V8HI_FTYPE_V8HI:
25106 case V8HI_FTYPE_V16QI:
25107 case V8QI_FTYPE_V8QI:
25108 case V8SF_FTYPE_V8SF:
25109 case V8SF_FTYPE_V8SI:
25110 case V8SF_FTYPE_V4SF:
25111 case V8SF_FTYPE_V8HI:
25112 case V4SI_FTYPE_V4SI:
25113 case V4SI_FTYPE_V16QI:
25114 case V4SI_FTYPE_V4SF:
25115 case V4SI_FTYPE_V8SI:
25116 case V4SI_FTYPE_V8HI:
25117 case V4SI_FTYPE_V4DF:
25118 case V4SI_FTYPE_V2DF:
25119 case V4HI_FTYPE_V4HI:
25120 case V4DF_FTYPE_V4DF:
25121 case V4DF_FTYPE_V4SI:
25122 case V4DF_FTYPE_V4SF:
25123 case V4DF_FTYPE_V2DF:
25124 case V4SF_FTYPE_V4SF:
25125 case V4SF_FTYPE_V4SI:
25126 case V4SF_FTYPE_V8SF:
25127 case V4SF_FTYPE_V4DF:
25128 case V4SF_FTYPE_V8HI:
25129 case V4SF_FTYPE_V2DF:
25130 case V2DI_FTYPE_V2DI:
25131 case V2DI_FTYPE_V16QI:
25132 case V2DI_FTYPE_V8HI:
25133 case V2DI_FTYPE_V4SI:
25134 case V2DF_FTYPE_V2DF:
25135 case V2DF_FTYPE_V4SI:
25136 case V2DF_FTYPE_V4DF:
25137 case V2DF_FTYPE_V4SF:
25138 case V2DF_FTYPE_V2SI:
25139 case V2SI_FTYPE_V2SI:
25140 case V2SI_FTYPE_V4SF:
25141 case V2SI_FTYPE_V2SF:
25142 case V2SI_FTYPE_V2DF:
25143 case V2SF_FTYPE_V2SF:
25144 case V2SF_FTYPE_V2SI:
25147 case V4SF_FTYPE_V4SF_VEC_MERGE:
25148 case V2DF_FTYPE_V2DF_VEC_MERGE:
25149 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25150 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25151 case V16QI_FTYPE_V16QI_V16QI:
25152 case V16QI_FTYPE_V8HI_V8HI:
25153 case V8QI_FTYPE_V8QI_V8QI:
25154 case V8QI_FTYPE_V4HI_V4HI:
25155 case V8HI_FTYPE_V8HI_V8HI:
25156 case V8HI_FTYPE_V16QI_V16QI:
25157 case V8HI_FTYPE_V4SI_V4SI:
25158 case V8SF_FTYPE_V8SF_V8SF:
25159 case V8SF_FTYPE_V8SF_V8SI:
25160 case V4SI_FTYPE_V4SI_V4SI:
25161 case V4SI_FTYPE_V8HI_V8HI:
25162 case V4SI_FTYPE_V4SF_V4SF:
25163 case V4SI_FTYPE_V2DF_V2DF:
25164 case V4HI_FTYPE_V4HI_V4HI:
25165 case V4HI_FTYPE_V8QI_V8QI:
25166 case V4HI_FTYPE_V2SI_V2SI:
25167 case V4DF_FTYPE_V4DF_V4DF:
25168 case V4DF_FTYPE_V4DF_V4DI:
25169 case V4SF_FTYPE_V4SF_V4SF:
25170 case V4SF_FTYPE_V4SF_V4SI:
25171 case V4SF_FTYPE_V4SF_V2SI:
25172 case V4SF_FTYPE_V4SF_V2DF:
25173 case V4SF_FTYPE_V4SF_DI:
25174 case V4SF_FTYPE_V4SF_SI:
25175 case V2DI_FTYPE_V2DI_V2DI:
25176 case V2DI_FTYPE_V16QI_V16QI:
25177 case V2DI_FTYPE_V4SI_V4SI:
25178 case V2DI_FTYPE_V2DI_V16QI:
25179 case V2DI_FTYPE_V2DF_V2DF:
25180 case V2SI_FTYPE_V2SI_V2SI:
25181 case V2SI_FTYPE_V4HI_V4HI:
25182 case V2SI_FTYPE_V2SF_V2SF:
25183 case V2DF_FTYPE_V2DF_V2DF:
25184 case V2DF_FTYPE_V2DF_V4SF:
25185 case V2DF_FTYPE_V2DF_V2DI:
25186 case V2DF_FTYPE_V2DF_DI:
25187 case V2DF_FTYPE_V2DF_SI:
25188 case V2SF_FTYPE_V2SF_V2SF:
25189 case V1DI_FTYPE_V1DI_V1DI:
25190 case V1DI_FTYPE_V8QI_V8QI:
25191 case V1DI_FTYPE_V2SI_V2SI:
25192 if (comparison == UNKNOWN)
25193 return ix86_expand_binop_builtin (icode, exp, target);
25196 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25197 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25198 gcc_assert (comparison != UNKNOWN);
25202 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25203 case V8HI_FTYPE_V8HI_SI_COUNT:
25204 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25205 case V4SI_FTYPE_V4SI_SI_COUNT:
25206 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25207 case V4HI_FTYPE_V4HI_SI_COUNT:
25208 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25209 case V2DI_FTYPE_V2DI_SI_COUNT:
25210 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25211 case V2SI_FTYPE_V2SI_SI_COUNT:
25212 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25213 case V1DI_FTYPE_V1DI_SI_COUNT:
25215 last_arg_count = true;
25217 case UINT64_FTYPE_UINT64_UINT64:
25218 case UINT_FTYPE_UINT_UINT:
25219 case UINT_FTYPE_UINT_USHORT:
25220 case UINT_FTYPE_UINT_UCHAR:
25221 case UINT16_FTYPE_UINT16_INT:
25222 case UINT8_FTYPE_UINT8_INT:
25225 case V2DI_FTYPE_V2DI_INT_CONVERT:
25228 nargs_constant = 1;
25230 case V8HI_FTYPE_V8HI_INT:
25231 case V8HI_FTYPE_V8SF_INT:
25232 case V8HI_FTYPE_V4SF_INT:
25233 case V8SF_FTYPE_V8SF_INT:
25234 case V4SI_FTYPE_V4SI_INT:
25235 case V4SI_FTYPE_V8SI_INT:
25236 case V4HI_FTYPE_V4HI_INT:
25237 case V4DF_FTYPE_V4DF_INT:
25238 case V4SF_FTYPE_V4SF_INT:
25239 case V4SF_FTYPE_V8SF_INT:
25240 case V2DI_FTYPE_V2DI_INT:
25241 case V2DF_FTYPE_V2DF_INT:
25242 case V2DF_FTYPE_V4DF_INT:
25244 nargs_constant = 1;
25246 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25247 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25248 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25249 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25250 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25253 case V16QI_FTYPE_V16QI_V16QI_INT:
25254 case V8HI_FTYPE_V8HI_V8HI_INT:
25255 case V8SI_FTYPE_V8SI_V8SI_INT:
25256 case V8SI_FTYPE_V8SI_V4SI_INT:
25257 case V8SF_FTYPE_V8SF_V8SF_INT:
25258 case V8SF_FTYPE_V8SF_V4SF_INT:
25259 case V4SI_FTYPE_V4SI_V4SI_INT:
25260 case V4DF_FTYPE_V4DF_V4DF_INT:
25261 case V4DF_FTYPE_V4DF_V2DF_INT:
25262 case V4SF_FTYPE_V4SF_V4SF_INT:
25263 case V2DI_FTYPE_V2DI_V2DI_INT:
25264 case V2DF_FTYPE_V2DF_V2DF_INT:
25266 nargs_constant = 1;
25268 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25271 nargs_constant = 1;
25273 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25276 nargs_constant = 1;
25278 case V2DI_FTYPE_V2DI_UINT_UINT:
25280 nargs_constant = 2;
25282 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25283 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25284 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25285 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25287 nargs_constant = 1;
25289 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25291 nargs_constant = 2;
25294 gcc_unreachable ();
25297 gcc_assert (nargs <= ARRAY_SIZE (args));
25299 if (comparison != UNKNOWN)
25301 gcc_assert (nargs == 2);
25302 return ix86_expand_sse_compare (d, exp, target, swap);
25305 if (rmode == VOIDmode || rmode == tmode)
25309 || GET_MODE (target) != tmode
25310 || !insn_p->operand[0].predicate (target, tmode))
25311 target = gen_reg_rtx (tmode);
25312 real_target = target;
25316 target = gen_reg_rtx (rmode);
25317 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25320 for (i = 0; i < nargs; i++)
25322 tree arg = CALL_EXPR_ARG (exp, i);
25323 rtx op = expand_normal (arg);
25324 enum machine_mode mode = insn_p->operand[i + 1].mode;
25325 bool match = insn_p->operand[i + 1].predicate (op, mode);
25327 if (last_arg_count && (i + 1) == nargs)
25329 /* SIMD shift insns take either an 8-bit immediate or
25330 register as count. But builtin functions take int as
25331 count. If count doesn't match, we put it in register. */
25334 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25335 if (!insn_p->operand[i + 1].predicate (op, mode))
25336 op = copy_to_reg (op);
25339 else if ((nargs - i) <= nargs_constant)
25344 case CODE_FOR_sse4_1_roundpd:
25345 case CODE_FOR_sse4_1_roundps:
25346 case CODE_FOR_sse4_1_roundsd:
25347 case CODE_FOR_sse4_1_roundss:
25348 case CODE_FOR_sse4_1_blendps:
25349 case CODE_FOR_avx_blendpd256:
25350 case CODE_FOR_avx_vpermilv4df:
25351 case CODE_FOR_avx_roundpd256:
25352 case CODE_FOR_avx_roundps256:
25353 error ("the last argument must be a 4-bit immediate");
25356 case CODE_FOR_sse4_1_blendpd:
25357 case CODE_FOR_avx_vpermilv2df:
25358 case CODE_FOR_xop_vpermil2v2df3:
25359 case CODE_FOR_xop_vpermil2v4sf3:
25360 case CODE_FOR_xop_vpermil2v4df3:
25361 case CODE_FOR_xop_vpermil2v8sf3:
25362 error ("the last argument must be a 2-bit immediate");
25365 case CODE_FOR_avx_vextractf128v4df:
25366 case CODE_FOR_avx_vextractf128v8sf:
25367 case CODE_FOR_avx_vextractf128v8si:
25368 case CODE_FOR_avx_vinsertf128v4df:
25369 case CODE_FOR_avx_vinsertf128v8sf:
25370 case CODE_FOR_avx_vinsertf128v8si:
25371 error ("the last argument must be a 1-bit immediate");
25374 case CODE_FOR_avx_cmpsdv2df3:
25375 case CODE_FOR_avx_cmpssv4sf3:
25376 case CODE_FOR_avx_cmppdv2df3:
25377 case CODE_FOR_avx_cmppsv4sf3:
25378 case CODE_FOR_avx_cmppdv4df3:
25379 case CODE_FOR_avx_cmppsv8sf3:
25380 error ("the last argument must be a 5-bit immediate");
25384 switch (nargs_constant)
25387 if ((nargs - i) == nargs_constant)
25389 error ("the next to last argument must be an 8-bit immediate");
25393 error ("the last argument must be an 8-bit immediate");
25396 gcc_unreachable ();
25403 if (VECTOR_MODE_P (mode))
25404 op = safe_vector_operand (op, mode);
25406 /* If we aren't optimizing, only allow one memory operand to
25408 if (memory_operand (op, mode))
25411 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25413 if (optimize || !match || num_memory > 1)
25414 op = copy_to_mode_reg (mode, op);
25418 op = copy_to_reg (op);
25419 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25424 args[i].mode = mode;
25430 pat = GEN_FCN (icode) (real_target, args[0].op);
25433 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25436 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25440 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25441 args[2].op, args[3].op);
25444 gcc_unreachable ();
25454 /* Subroutine of ix86_expand_builtin to take care of special insns
25455 with variable number of operands. */
25458 ix86_expand_special_args_builtin (const struct builtin_description *d,
25459 tree exp, rtx target)
25463 unsigned int i, nargs, arg_adjust, memory;
25467 enum machine_mode mode;
25469 enum insn_code icode = d->icode;
25470 bool last_arg_constant = false;
25471 const struct insn_data_d *insn_p = &insn_data[icode];
25472 enum machine_mode tmode = insn_p->operand[0].mode;
25473 enum { load, store } klass;
25475 switch ((enum ix86_builtin_func_type) d->flag)
25477 case VOID_FTYPE_VOID:
25478 emit_insn (GEN_FCN (icode) (target));
25480 case VOID_FTYPE_UINT64:
25481 case VOID_FTYPE_UNSIGNED:
25487 case UINT64_FTYPE_VOID:
25488 case UNSIGNED_FTYPE_VOID:
25489 case UINT16_FTYPE_VOID:
25494 case UINT64_FTYPE_PUNSIGNED:
25495 case V2DI_FTYPE_PV2DI:
25496 case V32QI_FTYPE_PCCHAR:
25497 case V16QI_FTYPE_PCCHAR:
25498 case V8SF_FTYPE_PCV4SF:
25499 case V8SF_FTYPE_PCFLOAT:
25500 case V4SF_FTYPE_PCFLOAT:
25501 case V4DF_FTYPE_PCV2DF:
25502 case V4DF_FTYPE_PCDOUBLE:
25503 case V2DF_FTYPE_PCDOUBLE:
25504 case VOID_FTYPE_PVOID:
25509 case VOID_FTYPE_PV2SF_V4SF:
25510 case VOID_FTYPE_PV4DI_V4DI:
25511 case VOID_FTYPE_PV2DI_V2DI:
25512 case VOID_FTYPE_PCHAR_V32QI:
25513 case VOID_FTYPE_PCHAR_V16QI:
25514 case VOID_FTYPE_PFLOAT_V8SF:
25515 case VOID_FTYPE_PFLOAT_V4SF:
25516 case VOID_FTYPE_PDOUBLE_V4DF:
25517 case VOID_FTYPE_PDOUBLE_V2DF:
25518 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25519 case VOID_FTYPE_PINT_INT:
25522 /* Reserve memory operand for target. */
25523 memory = ARRAY_SIZE (args);
25525 case V4SF_FTYPE_V4SF_PCV2SF:
25526 case V2DF_FTYPE_V2DF_PCDOUBLE:
25531 case V8SF_FTYPE_PCV8SF_V8SF:
25532 case V4DF_FTYPE_PCV4DF_V4DF:
25533 case V4SF_FTYPE_PCV4SF_V4SF:
25534 case V2DF_FTYPE_PCV2DF_V2DF:
25539 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25540 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25541 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25542 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25545 /* Reserve memory operand for target. */
25546 memory = ARRAY_SIZE (args);
25548 case VOID_FTYPE_UINT_UINT_UINT:
25549 case VOID_FTYPE_UINT64_UINT_UINT:
25550 case UCHAR_FTYPE_UINT_UINT_UINT:
25551 case UCHAR_FTYPE_UINT64_UINT_UINT:
25554 memory = ARRAY_SIZE (args);
25555 last_arg_constant = true;
25558 gcc_unreachable ();
25561 gcc_assert (nargs <= ARRAY_SIZE (args));
25563 if (klass == store)
25565 arg = CALL_EXPR_ARG (exp, 0);
25566 op = expand_normal (arg);
25567 gcc_assert (target == 0);
25569 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25571 target = force_reg (tmode, op);
25579 || GET_MODE (target) != tmode
25580 || !insn_p->operand[0].predicate (target, tmode))
25581 target = gen_reg_rtx (tmode);
25584 for (i = 0; i < nargs; i++)
25586 enum machine_mode mode = insn_p->operand[i + 1].mode;
25589 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25590 op = expand_normal (arg);
25591 match = insn_p->operand[i + 1].predicate (op, mode);
25593 if (last_arg_constant && (i + 1) == nargs)
25597 if (icode == CODE_FOR_lwp_lwpvalsi3
25598 || icode == CODE_FOR_lwp_lwpinssi3
25599 || icode == CODE_FOR_lwp_lwpvaldi3
25600 || icode == CODE_FOR_lwp_lwpinsdi3)
25601 error ("the last argument must be a 32-bit immediate");
25603 error ("the last argument must be an 8-bit immediate");
25611 /* This must be the memory operand. */
25612 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25613 gcc_assert (GET_MODE (op) == mode
25614 || GET_MODE (op) == VOIDmode);
25618 /* This must be register. */
25619 if (VECTOR_MODE_P (mode))
25620 op = safe_vector_operand (op, mode);
25622 gcc_assert (GET_MODE (op) == mode
25623 || GET_MODE (op) == VOIDmode);
25624 op = copy_to_mode_reg (mode, op);
25629 args[i].mode = mode;
25635 pat = GEN_FCN (icode) (target);
25638 pat = GEN_FCN (icode) (target, args[0].op);
25641 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25644 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25647 gcc_unreachable ();
25653 return klass == store ? 0 : target;
25656 /* Return the integer constant in ARG. Constrain it to be in the range
25657 of the subparts of VEC_TYPE; issue an error if not. */
25660 get_element_number (tree vec_type, tree arg)
25662 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25664 if (!host_integerp (arg, 1)
25665 || (elt = tree_low_cst (arg, 1), elt > max))
25667 error ("selector must be an integer constant in the range 0..%wi", max);
25674 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25675 ix86_expand_vector_init. We DO have language-level syntax for this, in
25676 the form of (type){ init-list }. Except that since we can't place emms
25677 instructions from inside the compiler, we can't allow the use of MMX
25678 registers unless the user explicitly asks for it. So we do *not* define
25679 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25680 we have builtins invoked by mmintrin.h that gives us license to emit
25681 these sorts of instructions. */
25684 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25686 enum machine_mode tmode = TYPE_MODE (type);
25687 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25688 int i, n_elt = GET_MODE_NUNITS (tmode);
25689 rtvec v = rtvec_alloc (n_elt);
25691 gcc_assert (VECTOR_MODE_P (tmode));
25692 gcc_assert (call_expr_nargs (exp) == n_elt);
25694 for (i = 0; i < n_elt; ++i)
25696 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25697 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25700 if (!target || !register_operand (target, tmode))
25701 target = gen_reg_rtx (tmode);
25703 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25707 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25708 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25709 had a language-level syntax for referencing vector elements. */
25712 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25714 enum machine_mode tmode, mode0;
25719 arg0 = CALL_EXPR_ARG (exp, 0);
25720 arg1 = CALL_EXPR_ARG (exp, 1);
25722 op0 = expand_normal (arg0);
25723 elt = get_element_number (TREE_TYPE (arg0), arg1);
25725 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25726 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25727 gcc_assert (VECTOR_MODE_P (mode0));
25729 op0 = force_reg (mode0, op0);
25731 if (optimize || !target || !register_operand (target, tmode))
25732 target = gen_reg_rtx (tmode);
25734 ix86_expand_vector_extract (true, target, op0, elt);
25739 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25740 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25741 a language-level syntax for referencing vector elements. */
25744 ix86_expand_vec_set_builtin (tree exp)
25746 enum machine_mode tmode, mode1;
25747 tree arg0, arg1, arg2;
25749 rtx op0, op1, target;
25751 arg0 = CALL_EXPR_ARG (exp, 0);
25752 arg1 = CALL_EXPR_ARG (exp, 1);
25753 arg2 = CALL_EXPR_ARG (exp, 2);
25755 tmode = TYPE_MODE (TREE_TYPE (arg0));
25756 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25757 gcc_assert (VECTOR_MODE_P (tmode));
25759 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25760 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25761 elt = get_element_number (TREE_TYPE (arg0), arg2);
25763 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25764 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25766 op0 = force_reg (tmode, op0);
25767 op1 = force_reg (mode1, op1);
25769 /* OP0 is the source of these builtin functions and shouldn't be
25770 modified. Create a copy, use it and return it as target. */
25771 target = gen_reg_rtx (tmode);
25772 emit_move_insn (target, op0);
25773 ix86_expand_vector_set (true, target, op1, elt);
25778 /* Expand an expression EXP that calls a built-in function,
25779 with result going to TARGET if that's convenient
25780 (and in mode MODE if that's convenient).
25781 SUBTARGET may be used as the target for computing one of EXP's operands.
25782 IGNORE is nonzero if the value is to be ignored. */
25785 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25786 enum machine_mode mode ATTRIBUTE_UNUSED,
25787 int ignore ATTRIBUTE_UNUSED)
25789 const struct builtin_description *d;
25791 enum insn_code icode;
25792 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25793 tree arg0, arg1, arg2;
25794 rtx op0, op1, op2, pat;
25795 enum machine_mode mode0, mode1, mode2;
25796 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25798 /* Determine whether the builtin function is available under the current ISA.
25799 Originally the builtin was not created if it wasn't applicable to the
25800 current ISA based on the command line switches. With function specific
25801 options, we need to check in the context of the function making the call
25802 whether it is supported. */
25803 if (ix86_builtins_isa[fcode].isa
25804 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25806 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25807 NULL, NULL, false);
25810 error ("%qE needs unknown isa option", fndecl);
25813 gcc_assert (opts != NULL);
25814 error ("%qE needs isa option %s", fndecl, opts);
25822 case IX86_BUILTIN_MASKMOVQ:
25823 case IX86_BUILTIN_MASKMOVDQU:
25824 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25825 ? CODE_FOR_mmx_maskmovq
25826 : CODE_FOR_sse2_maskmovdqu);
25827 /* Note the arg order is different from the operand order. */
25828 arg1 = CALL_EXPR_ARG (exp, 0);
25829 arg2 = CALL_EXPR_ARG (exp, 1);
25830 arg0 = CALL_EXPR_ARG (exp, 2);
25831 op0 = expand_normal (arg0);
25832 op1 = expand_normal (arg1);
25833 op2 = expand_normal (arg2);
25834 mode0 = insn_data[icode].operand[0].mode;
25835 mode1 = insn_data[icode].operand[1].mode;
25836 mode2 = insn_data[icode].operand[2].mode;
25838 op0 = force_reg (Pmode, op0);
25839 op0 = gen_rtx_MEM (mode1, op0);
25841 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25842 op0 = copy_to_mode_reg (mode0, op0);
25843 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25844 op1 = copy_to_mode_reg (mode1, op1);
25845 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25846 op2 = copy_to_mode_reg (mode2, op2);
25847 pat = GEN_FCN (icode) (op0, op1, op2);
25853 case IX86_BUILTIN_LDMXCSR:
25854 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25855 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25856 emit_move_insn (target, op0);
25857 emit_insn (gen_sse_ldmxcsr (target));
25860 case IX86_BUILTIN_STMXCSR:
25861 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25862 emit_insn (gen_sse_stmxcsr (target));
25863 return copy_to_mode_reg (SImode, target);
25865 case IX86_BUILTIN_CLFLUSH:
25866 arg0 = CALL_EXPR_ARG (exp, 0);
25867 op0 = expand_normal (arg0);
25868 icode = CODE_FOR_sse2_clflush;
25869 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25870 op0 = copy_to_mode_reg (Pmode, op0);
25872 emit_insn (gen_sse2_clflush (op0));
25875 case IX86_BUILTIN_MONITOR:
25876 arg0 = CALL_EXPR_ARG (exp, 0);
25877 arg1 = CALL_EXPR_ARG (exp, 1);
25878 arg2 = CALL_EXPR_ARG (exp, 2);
25879 op0 = expand_normal (arg0);
25880 op1 = expand_normal (arg1);
25881 op2 = expand_normal (arg2);
25883 op0 = copy_to_mode_reg (Pmode, op0);
25885 op1 = copy_to_mode_reg (SImode, op1);
25887 op2 = copy_to_mode_reg (SImode, op2);
25888 emit_insn (ix86_gen_monitor (op0, op1, op2));
25891 case IX86_BUILTIN_MWAIT:
25892 arg0 = CALL_EXPR_ARG (exp, 0);
25893 arg1 = CALL_EXPR_ARG (exp, 1);
25894 op0 = expand_normal (arg0);
25895 op1 = expand_normal (arg1);
25897 op0 = copy_to_mode_reg (SImode, op0);
25899 op1 = copy_to_mode_reg (SImode, op1);
25900 emit_insn (gen_sse3_mwait (op0, op1));
25903 case IX86_BUILTIN_VEC_INIT_V2SI:
25904 case IX86_BUILTIN_VEC_INIT_V4HI:
25905 case IX86_BUILTIN_VEC_INIT_V8QI:
25906 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25908 case IX86_BUILTIN_VEC_EXT_V2DF:
25909 case IX86_BUILTIN_VEC_EXT_V2DI:
25910 case IX86_BUILTIN_VEC_EXT_V4SF:
25911 case IX86_BUILTIN_VEC_EXT_V4SI:
25912 case IX86_BUILTIN_VEC_EXT_V8HI:
25913 case IX86_BUILTIN_VEC_EXT_V2SI:
25914 case IX86_BUILTIN_VEC_EXT_V4HI:
25915 case IX86_BUILTIN_VEC_EXT_V16QI:
25916 return ix86_expand_vec_ext_builtin (exp, target);
25918 case IX86_BUILTIN_VEC_SET_V2DI:
25919 case IX86_BUILTIN_VEC_SET_V4SF:
25920 case IX86_BUILTIN_VEC_SET_V4SI:
25921 case IX86_BUILTIN_VEC_SET_V8HI:
25922 case IX86_BUILTIN_VEC_SET_V4HI:
25923 case IX86_BUILTIN_VEC_SET_V16QI:
25924 return ix86_expand_vec_set_builtin (exp);
25926 case IX86_BUILTIN_VEC_PERM_V2DF:
25927 case IX86_BUILTIN_VEC_PERM_V4SF:
25928 case IX86_BUILTIN_VEC_PERM_V2DI:
25929 case IX86_BUILTIN_VEC_PERM_V4SI:
25930 case IX86_BUILTIN_VEC_PERM_V8HI:
25931 case IX86_BUILTIN_VEC_PERM_V16QI:
25932 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25933 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25934 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25935 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25936 case IX86_BUILTIN_VEC_PERM_V4DF:
25937 case IX86_BUILTIN_VEC_PERM_V8SF:
25938 return ix86_expand_vec_perm_builtin (exp);
25940 case IX86_BUILTIN_INFQ:
25941 case IX86_BUILTIN_HUGE_VALQ:
25943 REAL_VALUE_TYPE inf;
25947 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25949 tmp = validize_mem (force_const_mem (mode, tmp));
25952 target = gen_reg_rtx (mode);
25954 emit_move_insn (target, tmp);
25958 case IX86_BUILTIN_LLWPCB:
25959 arg0 = CALL_EXPR_ARG (exp, 0);
25960 op0 = expand_normal (arg0);
25961 icode = CODE_FOR_lwp_llwpcb;
25962 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25963 op0 = copy_to_mode_reg (Pmode, op0);
25964 emit_insn (gen_lwp_llwpcb (op0));
25967 case IX86_BUILTIN_SLWPCB:
25968 icode = CODE_FOR_lwp_slwpcb;
25970 || !insn_data[icode].operand[0].predicate (target, Pmode))
25971 target = gen_reg_rtx (Pmode);
25972 emit_insn (gen_lwp_slwpcb (target));
25979 for (i = 0, d = bdesc_special_args;
25980 i < ARRAY_SIZE (bdesc_special_args);
25982 if (d->code == fcode)
25983 return ix86_expand_special_args_builtin (d, exp, target);
25985 for (i = 0, d = bdesc_args;
25986 i < ARRAY_SIZE (bdesc_args);
25988 if (d->code == fcode)
25991 case IX86_BUILTIN_FABSQ:
25992 case IX86_BUILTIN_COPYSIGNQ:
25994 /* Emit a normal call if SSE2 isn't available. */
25995 return expand_call (exp, target, ignore);
25997 return ix86_expand_args_builtin (d, exp, target);
26000 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26001 if (d->code == fcode)
26002 return ix86_expand_sse_comi (d, exp, target);
26004 for (i = 0, d = bdesc_pcmpestr;
26005 i < ARRAY_SIZE (bdesc_pcmpestr);
26007 if (d->code == fcode)
26008 return ix86_expand_sse_pcmpestr (d, exp, target);
26010 for (i = 0, d = bdesc_pcmpistr;
26011 i < ARRAY_SIZE (bdesc_pcmpistr);
26013 if (d->code == fcode)
26014 return ix86_expand_sse_pcmpistr (d, exp, target);
26016 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26017 if (d->code == fcode)
26018 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
26019 (enum ix86_builtin_func_type)
26020 d->flag, d->comparison);
26022 gcc_unreachable ();
26025 /* Returns a function decl for a vectorized version of the builtin function
26026 with builtin function code FN and the result vector type TYPE, or NULL_TREE
26027 if it is not available. */
26030 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
26033 enum machine_mode in_mode, out_mode;
26035 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
26037 if (TREE_CODE (type_out) != VECTOR_TYPE
26038 || TREE_CODE (type_in) != VECTOR_TYPE
26039 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
26042 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26043 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26044 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26045 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26049 case BUILT_IN_SQRT:
26050 if (out_mode == DFmode && in_mode == DFmode)
26052 if (out_n == 2 && in_n == 2)
26053 return ix86_builtins[IX86_BUILTIN_SQRTPD];
26054 else if (out_n == 4 && in_n == 4)
26055 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
26059 case BUILT_IN_SQRTF:
26060 if (out_mode == SFmode && in_mode == SFmode)
26062 if (out_n == 4 && in_n == 4)
26063 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
26064 else if (out_n == 8 && in_n == 8)
26065 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
26069 case BUILT_IN_LRINT:
26070 if (out_mode == SImode && out_n == 4
26071 && in_mode == DFmode && in_n == 2)
26072 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
26075 case BUILT_IN_LRINTF:
26076 if (out_mode == SImode && in_mode == SFmode)
26078 if (out_n == 4 && in_n == 4)
26079 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26080 else if (out_n == 8 && in_n == 8)
26081 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
26085 case BUILT_IN_COPYSIGN:
26086 if (out_mode == DFmode && in_mode == DFmode)
26088 if (out_n == 2 && in_n == 2)
26089 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26090 else if (out_n == 4 && in_n == 4)
26091 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
26095 case BUILT_IN_COPYSIGNF:
26096 if (out_mode == SFmode && in_mode == SFmode)
26098 if (out_n == 4 && in_n == 4)
26099 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26100 else if (out_n == 8 && in_n == 8)
26101 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
26109 /* Dispatch to a handler for a vectorization library. */
26110 if (ix86_veclib_handler)
26111 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26117 /* Handler for an SVML-style interface to
26118 a library with vectorized intrinsics. */
26121 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26124 tree fntype, new_fndecl, args;
26127 enum machine_mode el_mode, in_mode;
26130 /* The SVML is suitable for unsafe math only. */
26131 if (!flag_unsafe_math_optimizations)
26134 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26135 n = TYPE_VECTOR_SUBPARTS (type_out);
26136 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26137 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26138 if (el_mode != in_mode
26146 case BUILT_IN_LOG10:
26148 case BUILT_IN_TANH:
26150 case BUILT_IN_ATAN:
26151 case BUILT_IN_ATAN2:
26152 case BUILT_IN_ATANH:
26153 case BUILT_IN_CBRT:
26154 case BUILT_IN_SINH:
26156 case BUILT_IN_ASINH:
26157 case BUILT_IN_ASIN:
26158 case BUILT_IN_COSH:
26160 case BUILT_IN_ACOSH:
26161 case BUILT_IN_ACOS:
26162 if (el_mode != DFmode || n != 2)
26166 case BUILT_IN_EXPF:
26167 case BUILT_IN_LOGF:
26168 case BUILT_IN_LOG10F:
26169 case BUILT_IN_POWF:
26170 case BUILT_IN_TANHF:
26171 case BUILT_IN_TANF:
26172 case BUILT_IN_ATANF:
26173 case BUILT_IN_ATAN2F:
26174 case BUILT_IN_ATANHF:
26175 case BUILT_IN_CBRTF:
26176 case BUILT_IN_SINHF:
26177 case BUILT_IN_SINF:
26178 case BUILT_IN_ASINHF:
26179 case BUILT_IN_ASINF:
26180 case BUILT_IN_COSHF:
26181 case BUILT_IN_COSF:
26182 case BUILT_IN_ACOSHF:
26183 case BUILT_IN_ACOSF:
26184 if (el_mode != SFmode || n != 4)
26192 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26194 if (fn == BUILT_IN_LOGF)
26195 strcpy (name, "vmlsLn4");
26196 else if (fn == BUILT_IN_LOG)
26197 strcpy (name, "vmldLn2");
26200 sprintf (name, "vmls%s", bname+10);
26201 name[strlen (name)-1] = '4';
26204 sprintf (name, "vmld%s2", bname+10);
26206 /* Convert to uppercase. */
26210 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26211 args = TREE_CHAIN (args))
26215 fntype = build_function_type_list (type_out, type_in, NULL);
26217 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26219 /* Build a function declaration for the vectorized function. */
26220 new_fndecl = build_decl (BUILTINS_LOCATION,
26221 FUNCTION_DECL, get_identifier (name), fntype);
26222 TREE_PUBLIC (new_fndecl) = 1;
26223 DECL_EXTERNAL (new_fndecl) = 1;
26224 DECL_IS_NOVOPS (new_fndecl) = 1;
26225 TREE_READONLY (new_fndecl) = 1;
26230 /* Handler for an ACML-style interface to
26231 a library with vectorized intrinsics. */
26234 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26236 char name[20] = "__vr.._";
26237 tree fntype, new_fndecl, args;
26240 enum machine_mode el_mode, in_mode;
26243 /* The ACML is 64bits only and suitable for unsafe math only as
26244 it does not correctly support parts of IEEE with the required
26245 precision such as denormals. */
26247 || !flag_unsafe_math_optimizations)
26250 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26251 n = TYPE_VECTOR_SUBPARTS (type_out);
26252 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26253 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26254 if (el_mode != in_mode
26264 case BUILT_IN_LOG2:
26265 case BUILT_IN_LOG10:
26268 if (el_mode != DFmode
26273 case BUILT_IN_SINF:
26274 case BUILT_IN_COSF:
26275 case BUILT_IN_EXPF:
26276 case BUILT_IN_POWF:
26277 case BUILT_IN_LOGF:
26278 case BUILT_IN_LOG2F:
26279 case BUILT_IN_LOG10F:
26282 if (el_mode != SFmode
26291 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26292 sprintf (name + 7, "%s", bname+10);
26295 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26296 args = TREE_CHAIN (args))
26300 fntype = build_function_type_list (type_out, type_in, NULL);
26302 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26304 /* Build a function declaration for the vectorized function. */
26305 new_fndecl = build_decl (BUILTINS_LOCATION,
26306 FUNCTION_DECL, get_identifier (name), fntype);
26307 TREE_PUBLIC (new_fndecl) = 1;
26308 DECL_EXTERNAL (new_fndecl) = 1;
26309 DECL_IS_NOVOPS (new_fndecl) = 1;
26310 TREE_READONLY (new_fndecl) = 1;
26316 /* Returns a decl of a function that implements conversion of an integer vector
26317 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26318 are the types involved when converting according to CODE.
26319 Return NULL_TREE if it is not available. */
26322 ix86_vectorize_builtin_conversion (unsigned int code,
26323 tree dest_type, tree src_type)
26331 switch (TYPE_MODE (src_type))
26334 switch (TYPE_MODE (dest_type))
26337 return (TYPE_UNSIGNED (src_type)
26338 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26339 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26341 return (TYPE_UNSIGNED (src_type)
26343 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26349 switch (TYPE_MODE (dest_type))
26352 return (TYPE_UNSIGNED (src_type)
26354 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
26363 case FIX_TRUNC_EXPR:
26364 switch (TYPE_MODE (dest_type))
26367 switch (TYPE_MODE (src_type))
26370 return (TYPE_UNSIGNED (dest_type)
26372 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26374 return (TYPE_UNSIGNED (dest_type)
26376 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26383 switch (TYPE_MODE (src_type))
26386 return (TYPE_UNSIGNED (dest_type)
26388 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26405 /* Returns a code for a target-specific builtin that implements
26406 reciprocal of the function, or NULL_TREE if not available. */
26409 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26410 bool sqrt ATTRIBUTE_UNUSED)
26412 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26413 && flag_finite_math_only && !flag_trapping_math
26414 && flag_unsafe_math_optimizations))
26418 /* Machine dependent builtins. */
26421 /* Vectorized version of sqrt to rsqrt conversion. */
26422 case IX86_BUILTIN_SQRTPS_NR:
26423 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26425 case IX86_BUILTIN_SQRTPS_NR256:
26426 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
26432 /* Normal builtins. */
26435 /* Sqrt to rsqrt conversion. */
26436 case BUILT_IN_SQRTF:
26437 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26444 /* Helper for avx_vpermilps256_operand et al. This is also used by
26445 the expansion functions to turn the parallel back into a mask.
26446 The return value is 0 for no match and the imm8+1 for a match. */
26449 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
26451 unsigned i, nelt = GET_MODE_NUNITS (mode);
26453 unsigned char ipar[8];
26455 if (XVECLEN (par, 0) != (int) nelt)
26458 /* Validate that all of the elements are constants, and not totally
26459 out of range. Copy the data into an integral array to make the
26460 subsequent checks easier. */
26461 for (i = 0; i < nelt; ++i)
26463 rtx er = XVECEXP (par, 0, i);
26464 unsigned HOST_WIDE_INT ei;
26466 if (!CONST_INT_P (er))
26477 /* In the 256-bit DFmode case, we can only move elements within
26479 for (i = 0; i < 2; ++i)
26483 mask |= ipar[i] << i;
26485 for (i = 2; i < 4; ++i)
26489 mask |= (ipar[i] - 2) << i;
26494 /* In the 256-bit SFmode case, we have full freedom of movement
26495 within the low 128-bit lane, but the high 128-bit lane must
26496 mirror the exact same pattern. */
26497 for (i = 0; i < 4; ++i)
26498 if (ipar[i] + 4 != ipar[i + 4])
26505 /* In the 128-bit case, we've full freedom in the placement of
26506 the elements from the source operand. */
26507 for (i = 0; i < nelt; ++i)
26508 mask |= ipar[i] << (i * (nelt / 2));
26512 gcc_unreachable ();
26515 /* Make sure success has a non-zero value by adding one. */
26519 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26520 the expansion functions to turn the parallel back into a mask.
26521 The return value is 0 for no match and the imm8+1 for a match. */
26524 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26526 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26528 unsigned char ipar[8];
26530 if (XVECLEN (par, 0) != (int) nelt)
26533 /* Validate that all of the elements are constants, and not totally
26534 out of range. Copy the data into an integral array to make the
26535 subsequent checks easier. */
26536 for (i = 0; i < nelt; ++i)
26538 rtx er = XVECEXP (par, 0, i);
26539 unsigned HOST_WIDE_INT ei;
26541 if (!CONST_INT_P (er))
26544 if (ei >= 2 * nelt)
26549 /* Validate that the halves of the permute are halves. */
26550 for (i = 0; i < nelt2 - 1; ++i)
26551 if (ipar[i] + 1 != ipar[i + 1])
26553 for (i = nelt2; i < nelt - 1; ++i)
26554 if (ipar[i] + 1 != ipar[i + 1])
26557 /* Reconstruct the mask. */
26558 for (i = 0; i < 2; ++i)
26560 unsigned e = ipar[i * nelt2];
26564 mask |= e << (i * 4);
26567 /* Make sure success has a non-zero value by adding one. */
26572 /* Store OPERAND to the memory after reload is completed. This means
26573 that we can't easily use assign_stack_local. */
26575 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26579 gcc_assert (reload_completed);
26580 if (ix86_using_red_zone ())
26582 result = gen_rtx_MEM (mode,
26583 gen_rtx_PLUS (Pmode,
26585 GEN_INT (-RED_ZONE_SIZE)));
26586 emit_move_insn (result, operand);
26588 else if (TARGET_64BIT)
26594 operand = gen_lowpart (DImode, operand);
26598 gen_rtx_SET (VOIDmode,
26599 gen_rtx_MEM (DImode,
26600 gen_rtx_PRE_DEC (DImode,
26601 stack_pointer_rtx)),
26605 gcc_unreachable ();
26607 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26616 split_double_mode (mode, &operand, 1, operands, operands + 1);
26618 gen_rtx_SET (VOIDmode,
26619 gen_rtx_MEM (SImode,
26620 gen_rtx_PRE_DEC (Pmode,
26621 stack_pointer_rtx)),
26624 gen_rtx_SET (VOIDmode,
26625 gen_rtx_MEM (SImode,
26626 gen_rtx_PRE_DEC (Pmode,
26627 stack_pointer_rtx)),
26632 /* Store HImodes as SImodes. */
26633 operand = gen_lowpart (SImode, operand);
26637 gen_rtx_SET (VOIDmode,
26638 gen_rtx_MEM (GET_MODE (operand),
26639 gen_rtx_PRE_DEC (SImode,
26640 stack_pointer_rtx)),
26644 gcc_unreachable ();
26646 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26651 /* Free operand from the memory. */
26653 ix86_free_from_memory (enum machine_mode mode)
26655 if (!ix86_using_red_zone ())
26659 if (mode == DImode || TARGET_64BIT)
26663 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26664 to pop or add instruction if registers are available. */
26665 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26666 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26671 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26672 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26674 static const reg_class_t *
26675 i386_ira_cover_classes (void)
26677 static const reg_class_t sse_fpmath_classes[] = {
26678 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26680 static const reg_class_t no_sse_fpmath_classes[] = {
26681 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26684 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26687 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
26689 Put float CONST_DOUBLE in the constant pool instead of fp regs.
26690 QImode must go into class Q_REGS.
26691 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26692 movdf to do mem-to-mem moves through integer regs. */
26695 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
26697 enum machine_mode mode = GET_MODE (x);
26699 /* We're only allowed to return a subclass of CLASS. Many of the
26700 following checks fail for NO_REGS, so eliminate that early. */
26701 if (regclass == NO_REGS)
26704 /* All classes can load zeros. */
26705 if (x == CONST0_RTX (mode))
26708 /* Force constants into memory if we are loading a (nonzero) constant into
26709 an MMX or SSE register. This is because there are no MMX/SSE instructions
26710 to load from a constant. */
26712 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26715 /* Prefer SSE regs only, if we can use them for math. */
26716 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26717 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26719 /* Floating-point constants need more complex checks. */
26720 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26722 /* General regs can load everything. */
26723 if (reg_class_subset_p (regclass, GENERAL_REGS))
26726 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26727 zero above. We only want to wind up preferring 80387 registers if
26728 we plan on doing computation with them. */
26730 && standard_80387_constant_p (x))
26732 /* Limit class to non-sse. */
26733 if (regclass == FLOAT_SSE_REGS)
26735 if (regclass == FP_TOP_SSE_REGS)
26737 if (regclass == FP_SECOND_SSE_REGS)
26738 return FP_SECOND_REG;
26739 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26746 /* Generally when we see PLUS here, it's the function invariant
26747 (plus soft-fp const_int). Which can only be computed into general
26749 if (GET_CODE (x) == PLUS)
26750 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26752 /* QImode constants are easy to load, but non-constant QImode data
26753 must go into Q_REGS. */
26754 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26756 if (reg_class_subset_p (regclass, Q_REGS))
26758 if (reg_class_subset_p (Q_REGS, regclass))
26766 /* Discourage putting floating-point values in SSE registers unless
26767 SSE math is being used, and likewise for the 387 registers. */
26769 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
26771 enum machine_mode mode = GET_MODE (x);
26773 /* Restrict the output reload class to the register bank that we are doing
26774 math on. If we would like not to return a subset of CLASS, reject this
26775 alternative: if reload cannot do this, it will still use its choice. */
26776 mode = GET_MODE (x);
26777 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26778 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26780 if (X87_FLOAT_MODE_P (mode))
26782 if (regclass == FP_TOP_SSE_REGS)
26784 else if (regclass == FP_SECOND_SSE_REGS)
26785 return FP_SECOND_REG;
26787 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26794 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26795 enum machine_mode mode,
26796 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26798 /* QImode spills from non-QI registers require
26799 intermediate register on 32bit targets. */
26800 if (!in_p && mode == QImode && !TARGET_64BIT
26801 && (rclass == GENERAL_REGS
26802 || rclass == LEGACY_REGS
26803 || rclass == INDEX_REGS))
26812 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26813 regno = true_regnum (x);
26815 /* Return Q_REGS if the operand is in memory. */
26823 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26826 ix86_class_likely_spilled_p (reg_class_t rclass)
26837 case SSE_FIRST_REG:
26839 case FP_SECOND_REG:
26849 /* If we are copying between general and FP registers, we need a memory
26850 location. The same is true for SSE and MMX registers.
26852 To optimize register_move_cost performance, allow inline variant.
26854 The macro can't work reliably when one of the CLASSES is class containing
26855 registers from multiple units (SSE, MMX, integer). We avoid this by never
26856 combining those units in single alternative in the machine description.
26857 Ensure that this constraint holds to avoid unexpected surprises.
26859 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26860 enforce these sanity checks. */
26863 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26864 enum machine_mode mode, int strict)
26866 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26867 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26868 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26869 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26870 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26871 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26873 gcc_assert (!strict);
26877 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26880 /* ??? This is a lie. We do have moves between mmx/general, and for
26881 mmx/sse2. But by saying we need secondary memory we discourage the
26882 register allocator from using the mmx registers unless needed. */
26883 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26886 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26888 /* SSE1 doesn't have any direct moves from other classes. */
26892 /* If the target says that inter-unit moves are more expensive
26893 than moving through memory, then don't generate them. */
26894 if (!TARGET_INTER_UNIT_MOVES)
26897 /* Between SSE and general, we have moves no larger than word size. */
26898 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26906 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26907 enum machine_mode mode, int strict)
26909 return inline_secondary_memory_needed (class1, class2, mode, strict);
26912 /* Return true if the registers in CLASS cannot represent the change from
26913 modes FROM to TO. */
26916 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26917 enum reg_class regclass)
26922 /* x87 registers can't do subreg at all, as all values are reformatted
26923 to extended precision. */
26924 if (MAYBE_FLOAT_CLASS_P (regclass))
26927 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26929 /* Vector registers do not support QI or HImode loads. If we don't
26930 disallow a change to these modes, reload will assume it's ok to
26931 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26932 the vec_dupv4hi pattern. */
26933 if (GET_MODE_SIZE (from) < 4)
26936 /* Vector registers do not support subreg with nonzero offsets, which
26937 are otherwise valid for integer registers. Since we can't see
26938 whether we have a nonzero offset from here, prohibit all
26939 nonparadoxical subregs changing size. */
26940 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26947 /* Return the cost of moving data of mode M between a
26948 register and memory. A value of 2 is the default; this cost is
26949 relative to those in `REGISTER_MOVE_COST'.
26951 This function is used extensively by register_move_cost that is used to
26952 build tables at startup. Make it inline in this case.
26953 When IN is 2, return maximum of in and out move cost.
26955 If moving between registers and memory is more expensive than
26956 between two registers, you should define this macro to express the
26959 Model also increased moving costs of QImode registers in non
26963 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26967 if (FLOAT_CLASS_P (regclass))
26985 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26986 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26988 if (SSE_CLASS_P (regclass))
26991 switch (GET_MODE_SIZE (mode))
27006 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
27007 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
27009 if (MMX_CLASS_P (regclass))
27012 switch (GET_MODE_SIZE (mode))
27024 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
27025 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
27027 switch (GET_MODE_SIZE (mode))
27030 if (Q_CLASS_P (regclass) || TARGET_64BIT)
27033 return ix86_cost->int_store[0];
27034 if (TARGET_PARTIAL_REG_DEPENDENCY
27035 && optimize_function_for_speed_p (cfun))
27036 cost = ix86_cost->movzbl_load;
27038 cost = ix86_cost->int_load[0];
27040 return MAX (cost, ix86_cost->int_store[0]);
27046 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
27048 return ix86_cost->movzbl_load;
27050 return ix86_cost->int_store[0] + 4;
27055 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
27056 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
27058 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
27059 if (mode == TFmode)
27062 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
27064 cost = ix86_cost->int_load[2];
27066 cost = ix86_cost->int_store[2];
27067 return (cost * (((int) GET_MODE_SIZE (mode)
27068 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
27073 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
27076 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
27080 /* Return the cost of moving data from a register in class CLASS1 to
27081 one in class CLASS2.
27083 It is not required that the cost always equal 2 when FROM is the same as TO;
27084 on some machines it is expensive to move between registers if they are not
27085 general registers. */
27088 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
27089 reg_class_t class2_i)
27091 enum reg_class class1 = (enum reg_class) class1_i;
27092 enum reg_class class2 = (enum reg_class) class2_i;
27094 /* In case we require secondary memory, compute cost of the store followed
27095 by load. In order to avoid bad register allocation choices, we need
27096 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27098 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27102 cost += inline_memory_move_cost (mode, class1, 2);
27103 cost += inline_memory_move_cost (mode, class2, 2);
27105 /* In case of copying from general_purpose_register we may emit multiple
27106 stores followed by single load causing memory size mismatch stall.
27107 Count this as arbitrarily high cost of 20. */
27108 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27111 /* In the case of FP/MMX moves, the registers actually overlap, and we
27112 have to switch modes in order to treat them differently. */
27113 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27114 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27120 /* Moves between SSE/MMX and integer unit are expensive. */
27121 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27122 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27124 /* ??? By keeping returned value relatively high, we limit the number
27125 of moves between integer and MMX/SSE registers for all targets.
27126 Additionally, high value prevents problem with x86_modes_tieable_p(),
27127 where integer modes in MMX/SSE registers are not tieable
27128 because of missing QImode and HImode moves to, from or between
27129 MMX/SSE registers. */
27130 return MAX (8, ix86_cost->mmxsse_to_integer);
27132 if (MAYBE_FLOAT_CLASS_P (class1))
27133 return ix86_cost->fp_move;
27134 if (MAYBE_SSE_CLASS_P (class1))
27135 return ix86_cost->sse_move;
27136 if (MAYBE_MMX_CLASS_P (class1))
27137 return ix86_cost->mmx_move;
27141 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27144 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27146 /* Flags and only flags can only hold CCmode values. */
27147 if (CC_REGNO_P (regno))
27148 return GET_MODE_CLASS (mode) == MODE_CC;
27149 if (GET_MODE_CLASS (mode) == MODE_CC
27150 || GET_MODE_CLASS (mode) == MODE_RANDOM
27151 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27153 if (FP_REGNO_P (regno))
27154 return VALID_FP_MODE_P (mode);
27155 if (SSE_REGNO_P (regno))
27157 /* We implement the move patterns for all vector modes into and
27158 out of SSE registers, even when no operation instructions
27159 are available. OImode move is available only when AVX is
27161 return ((TARGET_AVX && mode == OImode)
27162 || VALID_AVX256_REG_MODE (mode)
27163 || VALID_SSE_REG_MODE (mode)
27164 || VALID_SSE2_REG_MODE (mode)
27165 || VALID_MMX_REG_MODE (mode)
27166 || VALID_MMX_REG_MODE_3DNOW (mode));
27168 if (MMX_REGNO_P (regno))
27170 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27171 so if the register is available at all, then we can move data of
27172 the given mode into or out of it. */
27173 return (VALID_MMX_REG_MODE (mode)
27174 || VALID_MMX_REG_MODE_3DNOW (mode));
27177 if (mode == QImode)
27179 /* Take care for QImode values - they can be in non-QI regs,
27180 but then they do cause partial register stalls. */
27181 if (regno <= BX_REG || TARGET_64BIT)
27183 if (!TARGET_PARTIAL_REG_STALL)
27185 return reload_in_progress || reload_completed;
27187 /* We handle both integer and floats in the general purpose registers. */
27188 else if (VALID_INT_MODE_P (mode))
27190 else if (VALID_FP_MODE_P (mode))
27192 else if (VALID_DFP_MODE_P (mode))
27194 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27195 on to use that value in smaller contexts, this can easily force a
27196 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27197 supporting DImode, allow it. */
27198 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27204 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27205 tieable integer mode. */
27208 ix86_tieable_integer_mode_p (enum machine_mode mode)
27217 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27220 return TARGET_64BIT;
27227 /* Return true if MODE1 is accessible in a register that can hold MODE2
27228 without copying. That is, all register classes that can hold MODE2
27229 can also hold MODE1. */
27232 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27234 if (mode1 == mode2)
27237 if (ix86_tieable_integer_mode_p (mode1)
27238 && ix86_tieable_integer_mode_p (mode2))
27241 /* MODE2 being XFmode implies fp stack or general regs, which means we
27242 can tie any smaller floating point modes to it. Note that we do not
27243 tie this with TFmode. */
27244 if (mode2 == XFmode)
27245 return mode1 == SFmode || mode1 == DFmode;
27247 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27248 that we can tie it with SFmode. */
27249 if (mode2 == DFmode)
27250 return mode1 == SFmode;
27252 /* If MODE2 is only appropriate for an SSE register, then tie with
27253 any other mode acceptable to SSE registers. */
27254 if (GET_MODE_SIZE (mode2) == 16
27255 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27256 return (GET_MODE_SIZE (mode1) == 16
27257 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27259 /* If MODE2 is appropriate for an MMX register, then tie
27260 with any other mode acceptable to MMX registers. */
27261 if (GET_MODE_SIZE (mode2) == 8
27262 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27263 return (GET_MODE_SIZE (mode1) == 8
27264 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27269 /* Compute a (partial) cost for rtx X. Return true if the complete
27270 cost has been computed, and false if subexpressions should be
27271 scanned. In either case, *TOTAL contains the cost result. */
27274 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27276 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27277 enum machine_mode mode = GET_MODE (x);
27278 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27286 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27288 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27290 else if (flag_pic && SYMBOLIC_CONST (x)
27292 || (!GET_CODE (x) != LABEL_REF
27293 && (GET_CODE (x) != SYMBOL_REF
27294 || !SYMBOL_REF_LOCAL_P (x)))))
27301 if (mode == VOIDmode)
27304 switch (standard_80387_constant_p (x))
27309 default: /* Other constants */
27314 /* Start with (MEM (SYMBOL_REF)), since that's where
27315 it'll probably end up. Add a penalty for size. */
27316 *total = (COSTS_N_INSNS (1)
27317 + (flag_pic != 0 && !TARGET_64BIT)
27318 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27324 /* The zero extensions is often completely free on x86_64, so make
27325 it as cheap as possible. */
27326 if (TARGET_64BIT && mode == DImode
27327 && GET_MODE (XEXP (x, 0)) == SImode)
27329 else if (TARGET_ZERO_EXTEND_WITH_AND)
27330 *total = cost->add;
27332 *total = cost->movzx;
27336 *total = cost->movsx;
27340 if (CONST_INT_P (XEXP (x, 1))
27341 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27343 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27346 *total = cost->add;
27349 if ((value == 2 || value == 3)
27350 && cost->lea <= cost->shift_const)
27352 *total = cost->lea;
27362 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27364 if (CONST_INT_P (XEXP (x, 1)))
27366 if (INTVAL (XEXP (x, 1)) > 32)
27367 *total = cost->shift_const + COSTS_N_INSNS (2);
27369 *total = cost->shift_const * 2;
27373 if (GET_CODE (XEXP (x, 1)) == AND)
27374 *total = cost->shift_var * 2;
27376 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27381 if (CONST_INT_P (XEXP (x, 1)))
27382 *total = cost->shift_const;
27384 *total = cost->shift_var;
27389 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27391 /* ??? SSE scalar cost should be used here. */
27392 *total = cost->fmul;
27395 else if (X87_FLOAT_MODE_P (mode))
27397 *total = cost->fmul;
27400 else if (FLOAT_MODE_P (mode))
27402 /* ??? SSE vector cost should be used here. */
27403 *total = cost->fmul;
27408 rtx op0 = XEXP (x, 0);
27409 rtx op1 = XEXP (x, 1);
27411 if (CONST_INT_P (XEXP (x, 1)))
27413 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27414 for (nbits = 0; value != 0; value &= value - 1)
27418 /* This is arbitrary. */
27421 /* Compute costs correctly for widening multiplication. */
27422 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27423 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27424 == GET_MODE_SIZE (mode))
27426 int is_mulwiden = 0;
27427 enum machine_mode inner_mode = GET_MODE (op0);
27429 if (GET_CODE (op0) == GET_CODE (op1))
27430 is_mulwiden = 1, op1 = XEXP (op1, 0);
27431 else if (CONST_INT_P (op1))
27433 if (GET_CODE (op0) == SIGN_EXTEND)
27434 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27437 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27441 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
27444 *total = (cost->mult_init[MODE_INDEX (mode)]
27445 + nbits * cost->mult_bit
27446 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27455 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27456 /* ??? SSE cost should be used here. */
27457 *total = cost->fdiv;
27458 else if (X87_FLOAT_MODE_P (mode))
27459 *total = cost->fdiv;
27460 else if (FLOAT_MODE_P (mode))
27461 /* ??? SSE vector cost should be used here. */
27462 *total = cost->fdiv;
27464 *total = cost->divide[MODE_INDEX (mode)];
27468 if (GET_MODE_CLASS (mode) == MODE_INT
27469 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27471 if (GET_CODE (XEXP (x, 0)) == PLUS
27472 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27473 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27474 && CONSTANT_P (XEXP (x, 1)))
27476 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27477 if (val == 2 || val == 4 || val == 8)
27479 *total = cost->lea;
27480 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27481 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27482 outer_code, speed);
27483 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27487 else if (GET_CODE (XEXP (x, 0)) == MULT
27488 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27490 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27491 if (val == 2 || val == 4 || val == 8)
27493 *total = cost->lea;
27494 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27495 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27499 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27501 *total = cost->lea;
27502 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27503 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27504 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27511 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27513 /* ??? SSE cost should be used here. */
27514 *total = cost->fadd;
27517 else if (X87_FLOAT_MODE_P (mode))
27519 *total = cost->fadd;
27522 else if (FLOAT_MODE_P (mode))
27524 /* ??? SSE vector cost should be used here. */
27525 *total = cost->fadd;
27533 if (!TARGET_64BIT && mode == DImode)
27535 *total = (cost->add * 2
27536 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27537 << (GET_MODE (XEXP (x, 0)) != DImode))
27538 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27539 << (GET_MODE (XEXP (x, 1)) != DImode)));
27545 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27547 /* ??? SSE cost should be used here. */
27548 *total = cost->fchs;
27551 else if (X87_FLOAT_MODE_P (mode))
27553 *total = cost->fchs;
27556 else if (FLOAT_MODE_P (mode))
27558 /* ??? SSE vector cost should be used here. */
27559 *total = cost->fchs;
27565 if (!TARGET_64BIT && mode == DImode)
27566 *total = cost->add * 2;
27568 *total = cost->add;
27572 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27573 && XEXP (XEXP (x, 0), 1) == const1_rtx
27574 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27575 && XEXP (x, 1) == const0_rtx)
27577 /* This kind of construct is implemented using test[bwl].
27578 Treat it as if we had an AND. */
27579 *total = (cost->add
27580 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27581 + rtx_cost (const1_rtx, outer_code, speed));
27587 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27592 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27593 /* ??? SSE cost should be used here. */
27594 *total = cost->fabs;
27595 else if (X87_FLOAT_MODE_P (mode))
27596 *total = cost->fabs;
27597 else if (FLOAT_MODE_P (mode))
27598 /* ??? SSE vector cost should be used here. */
27599 *total = cost->fabs;
27603 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27604 /* ??? SSE cost should be used here. */
27605 *total = cost->fsqrt;
27606 else if (X87_FLOAT_MODE_P (mode))
27607 *total = cost->fsqrt;
27608 else if (FLOAT_MODE_P (mode))
27609 /* ??? SSE vector cost should be used here. */
27610 *total = cost->fsqrt;
27614 if (XINT (x, 1) == UNSPEC_TP)
27621 case VEC_DUPLICATE:
27622 /* ??? Assume all of these vector manipulation patterns are
27623 recognizable. In which case they all pretty much have the
27625 *total = COSTS_N_INSNS (1);
27635 static int current_machopic_label_num;
27637 /* Given a symbol name and its associated stub, write out the
27638 definition of the stub. */
27641 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27643 unsigned int length;
27644 char *binder_name, *symbol_name, lazy_ptr_name[32];
27645 int label = ++current_machopic_label_num;
27647 /* For 64-bit we shouldn't get here. */
27648 gcc_assert (!TARGET_64BIT);
27650 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27651 symb = targetm.strip_name_encoding (symb);
27653 length = strlen (stub);
27654 binder_name = XALLOCAVEC (char, length + 32);
27655 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27657 length = strlen (symb);
27658 symbol_name = XALLOCAVEC (char, length + 32);
27659 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27661 sprintf (lazy_ptr_name, "L%d$lz", label);
27664 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27666 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27668 fprintf (file, "%s:\n", stub);
27669 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27673 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27674 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27675 fprintf (file, "\tjmp\t*%%edx\n");
27678 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27680 fprintf (file, "%s:\n", binder_name);
27684 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27685 fputs ("\tpushl\t%eax\n", file);
27688 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27690 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27692 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27693 fprintf (file, "%s:\n", lazy_ptr_name);
27694 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27695 fprintf (file, ASM_LONG "%s\n", binder_name);
27697 #endif /* TARGET_MACHO */
27699 /* Order the registers for register allocator. */
27702 x86_order_regs_for_local_alloc (void)
27707 /* First allocate the local general purpose registers. */
27708 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27709 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27710 reg_alloc_order [pos++] = i;
27712 /* Global general purpose registers. */
27713 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27714 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27715 reg_alloc_order [pos++] = i;
27717 /* x87 registers come first in case we are doing FP math
27719 if (!TARGET_SSE_MATH)
27720 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27721 reg_alloc_order [pos++] = i;
27723 /* SSE registers. */
27724 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27725 reg_alloc_order [pos++] = i;
27726 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27727 reg_alloc_order [pos++] = i;
27729 /* x87 registers. */
27730 if (TARGET_SSE_MATH)
27731 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27732 reg_alloc_order [pos++] = i;
27734 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27735 reg_alloc_order [pos++] = i;
27737 /* Initialize the rest of array as we do not allocate some registers
27739 while (pos < FIRST_PSEUDO_REGISTER)
27740 reg_alloc_order [pos++] = 0;
27743 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27744 struct attribute_spec.handler. */
27746 ix86_handle_abi_attribute (tree *node, tree name,
27747 tree args ATTRIBUTE_UNUSED,
27748 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27750 if (TREE_CODE (*node) != FUNCTION_TYPE
27751 && TREE_CODE (*node) != METHOD_TYPE
27752 && TREE_CODE (*node) != FIELD_DECL
27753 && TREE_CODE (*node) != TYPE_DECL)
27755 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27757 *no_add_attrs = true;
27762 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27764 *no_add_attrs = true;
27768 /* Can combine regparm with all attributes but fastcall. */
27769 if (is_attribute_p ("ms_abi", name))
27771 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27773 error ("ms_abi and sysv_abi attributes are not compatible");
27778 else if (is_attribute_p ("sysv_abi", name))
27780 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27782 error ("ms_abi and sysv_abi attributes are not compatible");
27791 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27792 struct attribute_spec.handler. */
27794 ix86_handle_struct_attribute (tree *node, tree name,
27795 tree args ATTRIBUTE_UNUSED,
27796 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27799 if (DECL_P (*node))
27801 if (TREE_CODE (*node) == TYPE_DECL)
27802 type = &TREE_TYPE (*node);
27807 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27808 || TREE_CODE (*type) == UNION_TYPE)))
27810 warning (OPT_Wattributes, "%qE attribute ignored",
27812 *no_add_attrs = true;
27815 else if ((is_attribute_p ("ms_struct", name)
27816 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27817 || ((is_attribute_p ("gcc_struct", name)
27818 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27820 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27822 *no_add_attrs = true;
27829 ix86_handle_fndecl_attribute (tree *node, tree name,
27830 tree args ATTRIBUTE_UNUSED,
27831 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27833 if (TREE_CODE (*node) != FUNCTION_DECL)
27835 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27837 *no_add_attrs = true;
27843 ix86_ms_bitfield_layout_p (const_tree record_type)
27845 return ((TARGET_MS_BITFIELD_LAYOUT
27846 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27847 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27850 /* Returns an expression indicating where the this parameter is
27851 located on entry to the FUNCTION. */
27854 x86_this_parameter (tree function)
27856 tree type = TREE_TYPE (function);
27857 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27862 const int *parm_regs;
27864 if (ix86_function_type_abi (type) == MS_ABI)
27865 parm_regs = x86_64_ms_abi_int_parameter_registers;
27867 parm_regs = x86_64_int_parameter_registers;
27868 return gen_rtx_REG (DImode, parm_regs[aggr]);
27871 nregs = ix86_function_regparm (type, function);
27873 if (nregs > 0 && !stdarg_p (type))
27877 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27878 regno = aggr ? DX_REG : CX_REG;
27879 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27883 return gen_rtx_MEM (SImode,
27884 plus_constant (stack_pointer_rtx, 4));
27893 return gen_rtx_MEM (SImode,
27894 plus_constant (stack_pointer_rtx, 4));
27897 return gen_rtx_REG (SImode, regno);
27900 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27903 /* Determine whether x86_output_mi_thunk can succeed. */
27906 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27907 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27908 HOST_WIDE_INT vcall_offset, const_tree function)
27910 /* 64-bit can handle anything. */
27914 /* For 32-bit, everything's fine if we have one free register. */
27915 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27918 /* Need a free register for vcall_offset. */
27922 /* Need a free register for GOT references. */
27923 if (flag_pic && !targetm.binds_local_p (function))
27926 /* Otherwise ok. */
27930 /* Output the assembler code for a thunk function. THUNK_DECL is the
27931 declaration for the thunk function itself, FUNCTION is the decl for
27932 the target function. DELTA is an immediate constant offset to be
27933 added to THIS. If VCALL_OFFSET is nonzero, the word at
27934 *(*this + vcall_offset) should be added to THIS. */
27937 x86_output_mi_thunk (FILE *file,
27938 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27939 HOST_WIDE_INT vcall_offset, tree function)
27942 rtx this_param = x86_this_parameter (function);
27945 /* Make sure unwind info is emitted for the thunk if needed. */
27946 final_start_function (emit_barrier (), file, 1);
27948 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27949 pull it in now and let DELTA benefit. */
27950 if (REG_P (this_param))
27951 this_reg = this_param;
27952 else if (vcall_offset)
27954 /* Put the this parameter into %eax. */
27955 xops[0] = this_param;
27956 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27957 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27960 this_reg = NULL_RTX;
27962 /* Adjust the this parameter by a fixed constant. */
27965 xops[0] = GEN_INT (delta);
27966 xops[1] = this_reg ? this_reg : this_param;
27969 if (!x86_64_general_operand (xops[0], DImode))
27971 tmp = gen_rtx_REG (DImode, R10_REG);
27973 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27975 xops[1] = this_param;
27977 if (x86_maybe_negate_const_int (&xops[0], DImode))
27978 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27980 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27982 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27983 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27985 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27988 /* Adjust the this parameter by a value stored in the vtable. */
27992 tmp = gen_rtx_REG (DImode, R10_REG);
27995 int tmp_regno = CX_REG;
27996 if (lookup_attribute ("fastcall",
27997 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27998 || lookup_attribute ("thiscall",
27999 TYPE_ATTRIBUTES (TREE_TYPE (function))))
28000 tmp_regno = AX_REG;
28001 tmp = gen_rtx_REG (SImode, tmp_regno);
28004 xops[0] = gen_rtx_MEM (Pmode, this_reg);
28006 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28008 /* Adjust the this parameter. */
28009 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
28010 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
28012 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
28013 xops[0] = GEN_INT (vcall_offset);
28015 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
28016 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
28018 xops[1] = this_reg;
28019 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
28022 /* If necessary, drop THIS back to its stack slot. */
28023 if (this_reg && this_reg != this_param)
28025 xops[0] = this_reg;
28026 xops[1] = this_param;
28027 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28030 xops[0] = XEXP (DECL_RTL (function), 0);
28033 if (!flag_pic || targetm.binds_local_p (function))
28034 output_asm_insn ("jmp\t%P0", xops);
28035 /* All thunks should be in the same object as their target,
28036 and thus binds_local_p should be true. */
28037 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
28038 gcc_unreachable ();
28041 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
28042 tmp = gen_rtx_CONST (Pmode, tmp);
28043 tmp = gen_rtx_MEM (QImode, tmp);
28045 output_asm_insn ("jmp\t%A0", xops);
28050 if (!flag_pic || targetm.binds_local_p (function))
28051 output_asm_insn ("jmp\t%P0", xops);
28056 rtx sym_ref = XEXP (DECL_RTL (function), 0);
28057 if (TARGET_MACHO_BRANCH_ISLANDS)
28058 sym_ref = (gen_rtx_SYMBOL_REF
28060 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
28061 tmp = gen_rtx_MEM (QImode, sym_ref);
28063 output_asm_insn ("jmp\t%0", xops);
28066 #endif /* TARGET_MACHO */
28068 tmp = gen_rtx_REG (SImode, CX_REG);
28069 output_set_got (tmp, NULL_RTX);
28072 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
28073 output_asm_insn ("jmp\t{*}%1", xops);
28076 final_end_function ();
28080 x86_file_start (void)
28082 default_file_start ();
28084 darwin_file_start ();
28086 if (X86_FILE_START_VERSION_DIRECTIVE)
28087 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
28088 if (X86_FILE_START_FLTUSED)
28089 fputs ("\t.global\t__fltused\n", asm_out_file);
28090 if (ix86_asm_dialect == ASM_INTEL)
28091 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28095 x86_field_alignment (tree field, int computed)
28097 enum machine_mode mode;
28098 tree type = TREE_TYPE (field);
28100 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28102 mode = TYPE_MODE (strip_array_types (type));
28103 if (mode == DFmode || mode == DCmode
28104 || GET_MODE_CLASS (mode) == MODE_INT
28105 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28106 return MIN (32, computed);
28110 /* Output assembler code to FILE to increment profiler label # LABELNO
28111 for profiling a function entry. */
28113 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28115 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28120 #ifndef NO_PROFILE_COUNTERS
28121 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28124 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28125 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28127 fprintf (file, "\tcall\t%s\n", mcount_name);
28131 #ifndef NO_PROFILE_COUNTERS
28132 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28135 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28139 #ifndef NO_PROFILE_COUNTERS
28140 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28143 fprintf (file, "\tcall\t%s\n", mcount_name);
28147 /* We don't have exact information about the insn sizes, but we may assume
28148 quite safely that we are informed about all 1 byte insns and memory
28149 address sizes. This is enough to eliminate unnecessary padding in
28153 min_insn_size (rtx insn)
28157 if (!INSN_P (insn) || !active_insn_p (insn))
28160 /* Discard alignments we've emit and jump instructions. */
28161 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28162 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28164 if (JUMP_TABLE_DATA_P (insn))
28167 /* Important case - calls are always 5 bytes.
28168 It is common to have many calls in the row. */
28170 && symbolic_reference_mentioned_p (PATTERN (insn))
28171 && !SIBLING_CALL_P (insn))
28173 len = get_attr_length (insn);
28177 /* For normal instructions we rely on get_attr_length being exact,
28178 with a few exceptions. */
28179 if (!JUMP_P (insn))
28181 enum attr_type type = get_attr_type (insn);
28186 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28187 || asm_noperands (PATTERN (insn)) >= 0)
28194 /* Otherwise trust get_attr_length. */
28198 l = get_attr_length_address (insn);
28199 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28208 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28210 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28214 ix86_avoid_jump_mispredicts (void)
28216 rtx insn, start = get_insns ();
28217 int nbytes = 0, njumps = 0;
28220 /* Look for all minimal intervals of instructions containing 4 jumps.
28221 The intervals are bounded by START and INSN. NBYTES is the total
28222 size of instructions in the interval including INSN and not including
28223 START. When the NBYTES is smaller than 16 bytes, it is possible
28224 that the end of START and INSN ends up in the same 16byte page.
28226 The smallest offset in the page INSN can start is the case where START
28227 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28228 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28230 for (insn = start; insn; insn = NEXT_INSN (insn))
28234 if (LABEL_P (insn))
28236 int align = label_to_alignment (insn);
28237 int max_skip = label_to_max_skip (insn);
28241 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28242 already in the current 16 byte page, because otherwise
28243 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28244 bytes to reach 16 byte boundary. */
28246 || (align <= 3 && max_skip != (1 << align) - 1))
28249 fprintf (dump_file, "Label %i with max_skip %i\n",
28250 INSN_UID (insn), max_skip);
28253 while (nbytes + max_skip >= 16)
28255 start = NEXT_INSN (start);
28256 if ((JUMP_P (start)
28257 && GET_CODE (PATTERN (start)) != ADDR_VEC
28258 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28260 njumps--, isjump = 1;
28263 nbytes -= min_insn_size (start);
28269 min_size = min_insn_size (insn);
28270 nbytes += min_size;
28272 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28273 INSN_UID (insn), min_size);
28275 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28276 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28284 start = NEXT_INSN (start);
28285 if ((JUMP_P (start)
28286 && GET_CODE (PATTERN (start)) != ADDR_VEC
28287 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28289 njumps--, isjump = 1;
28292 nbytes -= min_insn_size (start);
28294 gcc_assert (njumps >= 0);
28296 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28297 INSN_UID (start), INSN_UID (insn), nbytes);
28299 if (njumps == 3 && isjump && nbytes < 16)
28301 int padsize = 15 - nbytes + min_insn_size (insn);
28304 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28305 INSN_UID (insn), padsize);
28306 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28312 /* AMD Athlon works faster
28313 when RET is not destination of conditional jump or directly preceded
28314 by other jump instruction. We avoid the penalty by inserting NOP just
28315 before the RET instructions in such cases. */
28317 ix86_pad_returns (void)
28322 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28324 basic_block bb = e->src;
28325 rtx ret = BB_END (bb);
28327 bool replace = false;
28329 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28330 || optimize_bb_for_size_p (bb))
28332 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28333 if (active_insn_p (prev) || LABEL_P (prev))
28335 if (prev && LABEL_P (prev))
28340 FOR_EACH_EDGE (e, ei, bb->preds)
28341 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28342 && !(e->flags & EDGE_FALLTHRU))
28347 prev = prev_active_insn (ret);
28349 && ((JUMP_P (prev) && any_condjump_p (prev))
28352 /* Empty functions get branch mispredict even when the jump destination
28353 is not visible to us. */
28354 if (!prev && !optimize_function_for_size_p (cfun))
28359 emit_jump_insn_before (gen_return_internal_long (), ret);
28365 /* Count the minimum number of instructions in BB. Return 4 if the
28366 number of instructions >= 4. */
28369 ix86_count_insn_bb (basic_block bb)
28372 int insn_count = 0;
28374 /* Count number of instructions in this block. Return 4 if the number
28375 of instructions >= 4. */
28376 FOR_BB_INSNS (bb, insn)
28378 /* Only happen in exit blocks. */
28380 && GET_CODE (PATTERN (insn)) == RETURN)
28383 if (NONDEBUG_INSN_P (insn)
28384 && GET_CODE (PATTERN (insn)) != USE
28385 && GET_CODE (PATTERN (insn)) != CLOBBER)
28388 if (insn_count >= 4)
28397 /* Count the minimum number of instructions in code path in BB.
28398 Return 4 if the number of instructions >= 4. */
28401 ix86_count_insn (basic_block bb)
28405 int min_prev_count;
28407 /* Only bother counting instructions along paths with no
28408 more than 2 basic blocks between entry and exit. Given
28409 that BB has an edge to exit, determine if a predecessor
28410 of BB has an edge from entry. If so, compute the number
28411 of instructions in the predecessor block. If there
28412 happen to be multiple such blocks, compute the minimum. */
28413 min_prev_count = 4;
28414 FOR_EACH_EDGE (e, ei, bb->preds)
28417 edge_iterator prev_ei;
28419 if (e->src == ENTRY_BLOCK_PTR)
28421 min_prev_count = 0;
28424 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28426 if (prev_e->src == ENTRY_BLOCK_PTR)
28428 int count = ix86_count_insn_bb (e->src);
28429 if (count < min_prev_count)
28430 min_prev_count = count;
28436 if (min_prev_count < 4)
28437 min_prev_count += ix86_count_insn_bb (bb);
28439 return min_prev_count;
28442 /* Pad short funtion to 4 instructions. */
28445 ix86_pad_short_function (void)
28450 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28452 rtx ret = BB_END (e->src);
28453 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28455 int insn_count = ix86_count_insn (e->src);
28457 /* Pad short function. */
28458 if (insn_count < 4)
28462 /* Find epilogue. */
28465 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28466 insn = PREV_INSN (insn);
28471 /* Two NOPs are counted as one instruction. */
28472 insn_count = 2 * (4 - insn_count);
28473 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28479 /* Implement machine specific optimizations. We implement padding of returns
28480 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28484 if (optimize && optimize_function_for_speed_p (cfun))
28486 if (TARGET_PAD_SHORT_FUNCTION)
28487 ix86_pad_short_function ();
28488 else if (TARGET_PAD_RETURNS)
28489 ix86_pad_returns ();
28490 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28491 if (TARGET_FOUR_JUMP_LIMIT)
28492 ix86_avoid_jump_mispredicts ();
28497 /* Return nonzero when QImode register that must be represented via REX prefix
28500 x86_extended_QIreg_mentioned_p (rtx insn)
28503 extract_insn_cached (insn);
28504 for (i = 0; i < recog_data.n_operands; i++)
28505 if (REG_P (recog_data.operand[i])
28506 && REGNO (recog_data.operand[i]) > BX_REG)
28511 /* Return nonzero when P points to register encoded via REX prefix.
28512 Called via for_each_rtx. */
28514 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28516 unsigned int regno;
28519 regno = REGNO (*p);
28520 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28523 /* Return true when INSN mentions register that must be encoded using REX
28526 x86_extended_reg_mentioned_p (rtx insn)
28528 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28529 extended_reg_mentioned_1, NULL);
28532 /* If profitable, negate (without causing overflow) integer constant
28533 of mode MODE at location LOC. Return true in this case. */
28535 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28539 if (!CONST_INT_P (*loc))
28545 /* DImode x86_64 constants must fit in 32 bits. */
28546 gcc_assert (x86_64_immediate_operand (*loc, mode));
28557 gcc_unreachable ();
28560 /* Avoid overflows. */
28561 if (mode_signbit_p (mode, *loc))
28564 val = INTVAL (*loc);
28566 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28567 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28568 if ((val < 0 && val != -128)
28571 *loc = GEN_INT (-val);
28578 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28579 optabs would emit if we didn't have TFmode patterns. */
28582 x86_emit_floatuns (rtx operands[2])
28584 rtx neglab, donelab, i0, i1, f0, in, out;
28585 enum machine_mode mode, inmode;
28587 inmode = GET_MODE (operands[1]);
28588 gcc_assert (inmode == SImode || inmode == DImode);
28591 in = force_reg (inmode, operands[1]);
28592 mode = GET_MODE (out);
28593 neglab = gen_label_rtx ();
28594 donelab = gen_label_rtx ();
28595 f0 = gen_reg_rtx (mode);
28597 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28599 expand_float (out, in, 0);
28601 emit_jump_insn (gen_jump (donelab));
28604 emit_label (neglab);
28606 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28608 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28610 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28612 expand_float (f0, i0, 0);
28614 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28616 emit_label (donelab);
28619 /* AVX does not support 32-byte integer vector operations,
28620 thus the longest vector we are faced with is V16QImode. */
28621 #define MAX_VECT_LEN 16
28623 struct expand_vec_perm_d
28625 rtx target, op0, op1;
28626 unsigned char perm[MAX_VECT_LEN];
28627 enum machine_mode vmode;
28628 unsigned char nelt;
28632 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28633 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28635 /* Get a vector mode of the same size as the original but with elements
28636 twice as wide. This is only guaranteed to apply to integral vectors. */
28638 static inline enum machine_mode
28639 get_mode_wider_vector (enum machine_mode o)
28641 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28642 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28643 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28644 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28648 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28649 with all elements equal to VAR. Return true if successful. */
28652 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28653 rtx target, rtx val)
28676 /* First attempt to recognize VAL as-is. */
28677 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28678 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28679 if (recog_memoized (insn) < 0)
28682 /* If that fails, force VAL into a register. */
28685 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28686 seq = get_insns ();
28689 emit_insn_before (seq, insn);
28691 ok = recog_memoized (insn) >= 0;
28700 if (TARGET_SSE || TARGET_3DNOW_A)
28704 val = gen_lowpart (SImode, val);
28705 x = gen_rtx_TRUNCATE (HImode, val);
28706 x = gen_rtx_VEC_DUPLICATE (mode, x);
28707 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28720 struct expand_vec_perm_d dperm;
28724 memset (&dperm, 0, sizeof (dperm));
28725 dperm.target = target;
28726 dperm.vmode = mode;
28727 dperm.nelt = GET_MODE_NUNITS (mode);
28728 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28730 /* Extend to SImode using a paradoxical SUBREG. */
28731 tmp1 = gen_reg_rtx (SImode);
28732 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28734 /* Insert the SImode value as low element of a V4SImode vector. */
28735 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28736 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28738 ok = (expand_vec_perm_1 (&dperm)
28739 || expand_vec_perm_broadcast_1 (&dperm));
28751 /* Replicate the value once into the next wider mode and recurse. */
28753 enum machine_mode smode, wsmode, wvmode;
28756 smode = GET_MODE_INNER (mode);
28757 wvmode = get_mode_wider_vector (mode);
28758 wsmode = GET_MODE_INNER (wvmode);
28760 val = convert_modes (wsmode, smode, val, true);
28761 x = expand_simple_binop (wsmode, ASHIFT, val,
28762 GEN_INT (GET_MODE_BITSIZE (smode)),
28763 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28764 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28766 x = gen_lowpart (wvmode, target);
28767 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28775 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28776 rtx x = gen_reg_rtx (hvmode);
28778 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28781 x = gen_rtx_VEC_CONCAT (mode, x, x);
28782 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28791 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28792 whose ONE_VAR element is VAR, and other elements are zero. Return true
28796 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28797 rtx target, rtx var, int one_var)
28799 enum machine_mode vsimode;
28802 bool use_vector_set = false;
28807 /* For SSE4.1, we normally use vector set. But if the second
28808 element is zero and inter-unit moves are OK, we use movq
28810 use_vector_set = (TARGET_64BIT
28812 && !(TARGET_INTER_UNIT_MOVES
28818 use_vector_set = TARGET_SSE4_1;
28821 use_vector_set = TARGET_SSE2;
28824 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28831 use_vector_set = TARGET_AVX;
28834 /* Use ix86_expand_vector_set in 64bit mode only. */
28835 use_vector_set = TARGET_AVX && TARGET_64BIT;
28841 if (use_vector_set)
28843 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28844 var = force_reg (GET_MODE_INNER (mode), var);
28845 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28861 var = force_reg (GET_MODE_INNER (mode), var);
28862 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28863 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28868 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28869 new_target = gen_reg_rtx (mode);
28871 new_target = target;
28872 var = force_reg (GET_MODE_INNER (mode), var);
28873 x = gen_rtx_VEC_DUPLICATE (mode, var);
28874 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28875 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28878 /* We need to shuffle the value to the correct position, so
28879 create a new pseudo to store the intermediate result. */
28881 /* With SSE2, we can use the integer shuffle insns. */
28882 if (mode != V4SFmode && TARGET_SSE2)
28884 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28886 GEN_INT (one_var == 1 ? 0 : 1),
28887 GEN_INT (one_var == 2 ? 0 : 1),
28888 GEN_INT (one_var == 3 ? 0 : 1)));
28889 if (target != new_target)
28890 emit_move_insn (target, new_target);
28894 /* Otherwise convert the intermediate result to V4SFmode and
28895 use the SSE1 shuffle instructions. */
28896 if (mode != V4SFmode)
28898 tmp = gen_reg_rtx (V4SFmode);
28899 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28904 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28906 GEN_INT (one_var == 1 ? 0 : 1),
28907 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28908 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28910 if (mode != V4SFmode)
28911 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28912 else if (tmp != target)
28913 emit_move_insn (target, tmp);
28915 else if (target != new_target)
28916 emit_move_insn (target, new_target);
28921 vsimode = V4SImode;
28927 vsimode = V2SImode;
28933 /* Zero extend the variable element to SImode and recurse. */
28934 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28936 x = gen_reg_rtx (vsimode);
28937 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28939 gcc_unreachable ();
28941 emit_move_insn (target, gen_lowpart (mode, x));
28949 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28950 consisting of the values in VALS. It is known that all elements
28951 except ONE_VAR are constants. Return true if successful. */
28954 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28955 rtx target, rtx vals, int one_var)
28957 rtx var = XVECEXP (vals, 0, one_var);
28958 enum machine_mode wmode;
28961 const_vec = copy_rtx (vals);
28962 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28963 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28971 /* For the two element vectors, it's just as easy to use
28972 the general case. */
28976 /* Use ix86_expand_vector_set in 64bit mode only. */
28999 /* There's no way to set one QImode entry easily. Combine
29000 the variable value with its adjacent constant value, and
29001 promote to an HImode set. */
29002 x = XVECEXP (vals, 0, one_var ^ 1);
29005 var = convert_modes (HImode, QImode, var, true);
29006 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
29007 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29008 x = GEN_INT (INTVAL (x) & 0xff);
29012 var = convert_modes (HImode, QImode, var, true);
29013 x = gen_int_mode (INTVAL (x) << 8, HImode);
29015 if (x != const0_rtx)
29016 var = expand_simple_binop (HImode, IOR, var, x, var,
29017 1, OPTAB_LIB_WIDEN);
29019 x = gen_reg_rtx (wmode);
29020 emit_move_insn (x, gen_lowpart (wmode, const_vec));
29021 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
29023 emit_move_insn (target, gen_lowpart (mode, x));
29030 emit_move_insn (target, const_vec);
29031 ix86_expand_vector_set (mmx_ok, target, var, one_var);
29035 /* A subroutine of ix86_expand_vector_init_general. Use vector
29036 concatenate to handle the most general case: all values variable,
29037 and none identical. */
29040 ix86_expand_vector_init_concat (enum machine_mode mode,
29041 rtx target, rtx *ops, int n)
29043 enum machine_mode cmode, hmode = VOIDmode;
29044 rtx first[8], second[4];
29084 gcc_unreachable ();
29087 if (!register_operand (ops[1], cmode))
29088 ops[1] = force_reg (cmode, ops[1]);
29089 if (!register_operand (ops[0], cmode))
29090 ops[0] = force_reg (cmode, ops[0]);
29091 emit_insn (gen_rtx_SET (VOIDmode, target,
29092 gen_rtx_VEC_CONCAT (mode, ops[0],
29112 gcc_unreachable ();
29128 gcc_unreachable ();
29133 /* FIXME: We process inputs backward to help RA. PR 36222. */
29136 for (; i > 0; i -= 2, j--)
29138 first[j] = gen_reg_rtx (cmode);
29139 v = gen_rtvec (2, ops[i - 1], ops[i]);
29140 ix86_expand_vector_init (false, first[j],
29141 gen_rtx_PARALLEL (cmode, v));
29147 gcc_assert (hmode != VOIDmode);
29148 for (i = j = 0; i < n; i += 2, j++)
29150 second[j] = gen_reg_rtx (hmode);
29151 ix86_expand_vector_init_concat (hmode, second [j],
29155 ix86_expand_vector_init_concat (mode, target, second, n);
29158 ix86_expand_vector_init_concat (mode, target, first, n);
29162 gcc_unreachable ();
29166 /* A subroutine of ix86_expand_vector_init_general. Use vector
29167 interleave to handle the most general case: all values variable,
29168 and none identical. */
29171 ix86_expand_vector_init_interleave (enum machine_mode mode,
29172 rtx target, rtx *ops, int n)
29174 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29177 rtx (*gen_load_even) (rtx, rtx, rtx);
29178 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29179 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29184 gen_load_even = gen_vec_setv8hi;
29185 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29186 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29187 inner_mode = HImode;
29188 first_imode = V4SImode;
29189 second_imode = V2DImode;
29190 third_imode = VOIDmode;
29193 gen_load_even = gen_vec_setv16qi;
29194 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29195 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29196 inner_mode = QImode;
29197 first_imode = V8HImode;
29198 second_imode = V4SImode;
29199 third_imode = V2DImode;
29202 gcc_unreachable ();
29205 for (i = 0; i < n; i++)
29207 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29208 op0 = gen_reg_rtx (SImode);
29209 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29211 /* Insert the SImode value as low element of V4SImode vector. */
29212 op1 = gen_reg_rtx (V4SImode);
29213 op0 = gen_rtx_VEC_MERGE (V4SImode,
29214 gen_rtx_VEC_DUPLICATE (V4SImode,
29216 CONST0_RTX (V4SImode),
29218 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29220 /* Cast the V4SImode vector back to a vector in orignal mode. */
29221 op0 = gen_reg_rtx (mode);
29222 emit_move_insn (op0, gen_lowpart (mode, op1));
29224 /* Load even elements into the second positon. */
29225 emit_insn (gen_load_even (op0,
29226 force_reg (inner_mode,
29230 /* Cast vector to FIRST_IMODE vector. */
29231 ops[i] = gen_reg_rtx (first_imode);
29232 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29235 /* Interleave low FIRST_IMODE vectors. */
29236 for (i = j = 0; i < n; i += 2, j++)
29238 op0 = gen_reg_rtx (first_imode);
29239 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29241 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29242 ops[j] = gen_reg_rtx (second_imode);
29243 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29246 /* Interleave low SECOND_IMODE vectors. */
29247 switch (second_imode)
29250 for (i = j = 0; i < n / 2; i += 2, j++)
29252 op0 = gen_reg_rtx (second_imode);
29253 emit_insn (gen_interleave_second_low (op0, ops[i],
29256 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29258 ops[j] = gen_reg_rtx (third_imode);
29259 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29261 second_imode = V2DImode;
29262 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29266 op0 = gen_reg_rtx (second_imode);
29267 emit_insn (gen_interleave_second_low (op0, ops[0],
29270 /* Cast the SECOND_IMODE vector back to a vector on original
29272 emit_insn (gen_rtx_SET (VOIDmode, target,
29273 gen_lowpart (mode, op0)));
29277 gcc_unreachable ();
29281 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29282 all values variable, and none identical. */
29285 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29286 rtx target, rtx vals)
29288 rtx ops[32], op0, op1;
29289 enum machine_mode half_mode = VOIDmode;
29296 if (!mmx_ok && !TARGET_SSE)
29308 n = GET_MODE_NUNITS (mode);
29309 for (i = 0; i < n; i++)
29310 ops[i] = XVECEXP (vals, 0, i);
29311 ix86_expand_vector_init_concat (mode, target, ops, n);
29315 half_mode = V16QImode;
29319 half_mode = V8HImode;
29323 n = GET_MODE_NUNITS (mode);
29324 for (i = 0; i < n; i++)
29325 ops[i] = XVECEXP (vals, 0, i);
29326 op0 = gen_reg_rtx (half_mode);
29327 op1 = gen_reg_rtx (half_mode);
29328 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29330 ix86_expand_vector_init_interleave (half_mode, op1,
29331 &ops [n >> 1], n >> 2);
29332 emit_insn (gen_rtx_SET (VOIDmode, target,
29333 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29337 if (!TARGET_SSE4_1)
29345 /* Don't use ix86_expand_vector_init_interleave if we can't
29346 move from GPR to SSE register directly. */
29347 if (!TARGET_INTER_UNIT_MOVES)
29350 n = GET_MODE_NUNITS (mode);
29351 for (i = 0; i < n; i++)
29352 ops[i] = XVECEXP (vals, 0, i);
29353 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29361 gcc_unreachable ();
29365 int i, j, n_elts, n_words, n_elt_per_word;
29366 enum machine_mode inner_mode;
29367 rtx words[4], shift;
29369 inner_mode = GET_MODE_INNER (mode);
29370 n_elts = GET_MODE_NUNITS (mode);
29371 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29372 n_elt_per_word = n_elts / n_words;
29373 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29375 for (i = 0; i < n_words; ++i)
29377 rtx word = NULL_RTX;
29379 for (j = 0; j < n_elt_per_word; ++j)
29381 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29382 elt = convert_modes (word_mode, inner_mode, elt, true);
29388 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29389 word, 1, OPTAB_LIB_WIDEN);
29390 word = expand_simple_binop (word_mode, IOR, word, elt,
29391 word, 1, OPTAB_LIB_WIDEN);
29399 emit_move_insn (target, gen_lowpart (mode, words[0]));
29400 else if (n_words == 2)
29402 rtx tmp = gen_reg_rtx (mode);
29403 emit_clobber (tmp);
29404 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29405 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29406 emit_move_insn (target, tmp);
29408 else if (n_words == 4)
29410 rtx tmp = gen_reg_rtx (V4SImode);
29411 gcc_assert (word_mode == SImode);
29412 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29413 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29414 emit_move_insn (target, gen_lowpart (mode, tmp));
29417 gcc_unreachable ();
29421 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29422 instructions unless MMX_OK is true. */
29425 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29427 enum machine_mode mode = GET_MODE (target);
29428 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29429 int n_elts = GET_MODE_NUNITS (mode);
29430 int n_var = 0, one_var = -1;
29431 bool all_same = true, all_const_zero = true;
29435 for (i = 0; i < n_elts; ++i)
29437 x = XVECEXP (vals, 0, i);
29438 if (!(CONST_INT_P (x)
29439 || GET_CODE (x) == CONST_DOUBLE
29440 || GET_CODE (x) == CONST_FIXED))
29441 n_var++, one_var = i;
29442 else if (x != CONST0_RTX (inner_mode))
29443 all_const_zero = false;
29444 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
29448 /* Constants are best loaded from the constant pool. */
29451 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29455 /* If all values are identical, broadcast the value. */
29457 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29458 XVECEXP (vals, 0, 0)))
29461 /* Values where only one field is non-constant are best loaded from
29462 the pool and overwritten via move later. */
29466 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29467 XVECEXP (vals, 0, one_var),
29471 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29475 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29479 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29481 enum machine_mode mode = GET_MODE (target);
29482 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29483 enum machine_mode half_mode;
29484 bool use_vec_merge = false;
29486 static rtx (*gen_extract[6][2]) (rtx, rtx)
29488 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29489 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29490 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29491 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29492 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29493 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29495 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29497 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29498 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29499 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29500 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29501 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29502 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29512 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29513 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29515 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29517 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29518 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29524 use_vec_merge = TARGET_SSE4_1;
29532 /* For the two element vectors, we implement a VEC_CONCAT with
29533 the extraction of the other element. */
29535 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29536 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29539 op0 = val, op1 = tmp;
29541 op0 = tmp, op1 = val;
29543 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29544 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29549 use_vec_merge = TARGET_SSE4_1;
29556 use_vec_merge = true;
29560 /* tmp = target = A B C D */
29561 tmp = copy_to_reg (target);
29562 /* target = A A B B */
29563 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29564 /* target = X A B B */
29565 ix86_expand_vector_set (false, target, val, 0);
29566 /* target = A X C D */
29567 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29568 const1_rtx, const0_rtx,
29569 GEN_INT (2+4), GEN_INT (3+4)));
29573 /* tmp = target = A B C D */
29574 tmp = copy_to_reg (target);
29575 /* tmp = X B C D */
29576 ix86_expand_vector_set (false, tmp, val, 0);
29577 /* target = A B X D */
29578 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29579 const0_rtx, const1_rtx,
29580 GEN_INT (0+4), GEN_INT (3+4)));
29584 /* tmp = target = A B C D */
29585 tmp = copy_to_reg (target);
29586 /* tmp = X B C D */
29587 ix86_expand_vector_set (false, tmp, val, 0);
29588 /* target = A B X D */
29589 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29590 const0_rtx, const1_rtx,
29591 GEN_INT (2+4), GEN_INT (0+4)));
29595 gcc_unreachable ();
29600 use_vec_merge = TARGET_SSE4_1;
29604 /* Element 0 handled by vec_merge below. */
29607 use_vec_merge = true;
29613 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29614 store into element 0, then shuffle them back. */
29618 order[0] = GEN_INT (elt);
29619 order[1] = const1_rtx;
29620 order[2] = const2_rtx;
29621 order[3] = GEN_INT (3);
29622 order[elt] = const0_rtx;
29624 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29625 order[1], order[2], order[3]));
29627 ix86_expand_vector_set (false, target, val, 0);
29629 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29630 order[1], order[2], order[3]));
29634 /* For SSE1, we have to reuse the V4SF code. */
29635 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29636 gen_lowpart (SFmode, val), elt);
29641 use_vec_merge = TARGET_SSE2;
29644 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29648 use_vec_merge = TARGET_SSE4_1;
29655 half_mode = V16QImode;
29661 half_mode = V8HImode;
29667 half_mode = V4SImode;
29673 half_mode = V2DImode;
29679 half_mode = V4SFmode;
29685 half_mode = V2DFmode;
29691 /* Compute offset. */
29695 gcc_assert (i <= 1);
29697 /* Extract the half. */
29698 tmp = gen_reg_rtx (half_mode);
29699 emit_insn (gen_extract[j][i] (tmp, target));
29701 /* Put val in tmp at elt. */
29702 ix86_expand_vector_set (false, tmp, val, elt);
29705 emit_insn (gen_insert[j][i] (target, target, tmp));
29714 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29715 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29716 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29720 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29722 emit_move_insn (mem, target);
29724 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29725 emit_move_insn (tmp, val);
29727 emit_move_insn (target, mem);
29732 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29734 enum machine_mode mode = GET_MODE (vec);
29735 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29736 bool use_vec_extr = false;
29749 use_vec_extr = true;
29753 use_vec_extr = TARGET_SSE4_1;
29765 tmp = gen_reg_rtx (mode);
29766 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29767 GEN_INT (elt), GEN_INT (elt),
29768 GEN_INT (elt+4), GEN_INT (elt+4)));
29772 tmp = gen_reg_rtx (mode);
29773 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29777 gcc_unreachable ();
29780 use_vec_extr = true;
29785 use_vec_extr = TARGET_SSE4_1;
29799 tmp = gen_reg_rtx (mode);
29800 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29801 GEN_INT (elt), GEN_INT (elt),
29802 GEN_INT (elt), GEN_INT (elt)));
29806 tmp = gen_reg_rtx (mode);
29807 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29811 gcc_unreachable ();
29814 use_vec_extr = true;
29819 /* For SSE1, we have to reuse the V4SF code. */
29820 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29821 gen_lowpart (V4SFmode, vec), elt);
29827 use_vec_extr = TARGET_SSE2;
29830 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29834 use_vec_extr = TARGET_SSE4_1;
29838 /* ??? Could extract the appropriate HImode element and shift. */
29845 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29846 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29848 /* Let the rtl optimizers know about the zero extension performed. */
29849 if (inner_mode == QImode || inner_mode == HImode)
29851 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29852 target = gen_lowpart (SImode, target);
29855 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29859 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29861 emit_move_insn (mem, vec);
29863 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29864 emit_move_insn (target, tmp);
29868 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29869 pattern to reduce; DEST is the destination; IN is the input vector. */
29872 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29874 rtx tmp1, tmp2, tmp3;
29876 tmp1 = gen_reg_rtx (V4SFmode);
29877 tmp2 = gen_reg_rtx (V4SFmode);
29878 tmp3 = gen_reg_rtx (V4SFmode);
29880 emit_insn (gen_sse_movhlps (tmp1, in, in));
29881 emit_insn (fn (tmp2, tmp1, in));
29883 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29884 const1_rtx, const1_rtx,
29885 GEN_INT (1+4), GEN_INT (1+4)));
29886 emit_insn (fn (dest, tmp2, tmp3));
29889 /* Target hook for scalar_mode_supported_p. */
29891 ix86_scalar_mode_supported_p (enum machine_mode mode)
29893 if (DECIMAL_FLOAT_MODE_P (mode))
29894 return default_decimal_float_supported_p ();
29895 else if (mode == TFmode)
29898 return default_scalar_mode_supported_p (mode);
29901 /* Implements target hook vector_mode_supported_p. */
29903 ix86_vector_mode_supported_p (enum machine_mode mode)
29905 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29907 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29909 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29911 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29913 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29918 /* Target hook for c_mode_for_suffix. */
29919 static enum machine_mode
29920 ix86_c_mode_for_suffix (char suffix)
29930 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29932 We do this in the new i386 backend to maintain source compatibility
29933 with the old cc0-based compiler. */
29936 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29937 tree inputs ATTRIBUTE_UNUSED,
29940 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29942 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29947 /* Implements target vector targetm.asm.encode_section_info. This
29948 is not used by netware. */
29950 static void ATTRIBUTE_UNUSED
29951 ix86_encode_section_info (tree decl, rtx rtl, int first)
29953 default_encode_section_info (decl, rtl, first);
29955 if (TREE_CODE (decl) == VAR_DECL
29956 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29957 && ix86_in_large_data_p (decl))
29958 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29961 /* Worker function for REVERSE_CONDITION. */
29964 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29966 return (mode != CCFPmode && mode != CCFPUmode
29967 ? reverse_condition (code)
29968 : reverse_condition_maybe_unordered (code));
29971 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29975 output_387_reg_move (rtx insn, rtx *operands)
29977 if (REG_P (operands[0]))
29979 if (REG_P (operands[1])
29980 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29982 if (REGNO (operands[0]) == FIRST_STACK_REG)
29983 return output_387_ffreep (operands, 0);
29984 return "fstp\t%y0";
29986 if (STACK_TOP_P (operands[0]))
29987 return "fld%Z1\t%y1";
29990 else if (MEM_P (operands[0]))
29992 gcc_assert (REG_P (operands[1]));
29993 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29994 return "fstp%Z0\t%y0";
29997 /* There is no non-popping store to memory for XFmode.
29998 So if we need one, follow the store with a load. */
29999 if (GET_MODE (operands[0]) == XFmode)
30000 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
30002 return "fst%Z0\t%y0";
30009 /* Output code to perform a conditional jump to LABEL, if C2 flag in
30010 FP status register is set. */
30013 ix86_emit_fp_unordered_jump (rtx label)
30015 rtx reg = gen_reg_rtx (HImode);
30018 emit_insn (gen_x86_fnstsw_1 (reg));
30020 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
30022 emit_insn (gen_x86_sahf_1 (reg));
30024 temp = gen_rtx_REG (CCmode, FLAGS_REG);
30025 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
30029 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
30031 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
30032 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
30035 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
30036 gen_rtx_LABEL_REF (VOIDmode, label),
30038 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
30040 emit_jump_insn (temp);
30041 predict_jump (REG_BR_PROB_BASE * 10 / 100);
30044 /* Output code to perform a log1p XFmode calculation. */
30046 void ix86_emit_i387_log1p (rtx op0, rtx op1)
30048 rtx label1 = gen_label_rtx ();
30049 rtx label2 = gen_label_rtx ();
30051 rtx tmp = gen_reg_rtx (XFmode);
30052 rtx tmp2 = gen_reg_rtx (XFmode);
30055 emit_insn (gen_absxf2 (tmp, op1));
30056 test = gen_rtx_GE (VOIDmode, tmp,
30057 CONST_DOUBLE_FROM_REAL_VALUE (
30058 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
30060 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
30062 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30063 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
30064 emit_jump (label2);
30066 emit_label (label1);
30067 emit_move_insn (tmp, CONST1_RTX (XFmode));
30068 emit_insn (gen_addxf3 (tmp, op1, tmp));
30069 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30070 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
30072 emit_label (label2);
30075 /* Output code to perform a Newton-Rhapson approximation of a single precision
30076 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
30078 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
30080 rtx x0, x1, e0, e1, two;
30082 x0 = gen_reg_rtx (mode);
30083 e0 = gen_reg_rtx (mode);
30084 e1 = gen_reg_rtx (mode);
30085 x1 = gen_reg_rtx (mode);
30087 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
30089 if (VECTOR_MODE_P (mode))
30090 two = ix86_build_const_vector (mode, true, two);
30092 two = force_reg (mode, two);
30094 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30096 /* x0 = rcp(b) estimate */
30097 emit_insn (gen_rtx_SET (VOIDmode, x0,
30098 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30101 emit_insn (gen_rtx_SET (VOIDmode, e0,
30102 gen_rtx_MULT (mode, x0, a)));
30104 emit_insn (gen_rtx_SET (VOIDmode, e1,
30105 gen_rtx_MULT (mode, x0, b)));
30107 emit_insn (gen_rtx_SET (VOIDmode, x1,
30108 gen_rtx_MINUS (mode, two, e1)));
30109 /* res = e0 * x1 */
30110 emit_insn (gen_rtx_SET (VOIDmode, res,
30111 gen_rtx_MULT (mode, e0, x1)));
30114 /* Output code to perform a Newton-Rhapson approximation of a
30115 single precision floating point [reciprocal] square root. */
30117 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30120 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30123 x0 = gen_reg_rtx (mode);
30124 e0 = gen_reg_rtx (mode);
30125 e1 = gen_reg_rtx (mode);
30126 e2 = gen_reg_rtx (mode);
30127 e3 = gen_reg_rtx (mode);
30129 real_from_integer (&r, VOIDmode, -3, -1, 0);
30130 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30132 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30133 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30135 if (VECTOR_MODE_P (mode))
30137 mthree = ix86_build_const_vector (mode, true, mthree);
30138 mhalf = ix86_build_const_vector (mode, true, mhalf);
30141 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30142 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30144 /* x0 = rsqrt(a) estimate */
30145 emit_insn (gen_rtx_SET (VOIDmode, x0,
30146 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30149 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30154 zero = gen_reg_rtx (mode);
30155 mask = gen_reg_rtx (mode);
30157 zero = force_reg (mode, CONST0_RTX(mode));
30158 emit_insn (gen_rtx_SET (VOIDmode, mask,
30159 gen_rtx_NE (mode, zero, a)));
30161 emit_insn (gen_rtx_SET (VOIDmode, x0,
30162 gen_rtx_AND (mode, x0, mask)));
30166 emit_insn (gen_rtx_SET (VOIDmode, e0,
30167 gen_rtx_MULT (mode, x0, a)));
30169 emit_insn (gen_rtx_SET (VOIDmode, e1,
30170 gen_rtx_MULT (mode, e0, x0)));
30173 mthree = force_reg (mode, mthree);
30174 emit_insn (gen_rtx_SET (VOIDmode, e2,
30175 gen_rtx_PLUS (mode, e1, mthree)));
30177 mhalf = force_reg (mode, mhalf);
30179 /* e3 = -.5 * x0 */
30180 emit_insn (gen_rtx_SET (VOIDmode, e3,
30181 gen_rtx_MULT (mode, x0, mhalf)));
30183 /* e3 = -.5 * e0 */
30184 emit_insn (gen_rtx_SET (VOIDmode, e3,
30185 gen_rtx_MULT (mode, e0, mhalf)));
30186 /* ret = e2 * e3 */
30187 emit_insn (gen_rtx_SET (VOIDmode, res,
30188 gen_rtx_MULT (mode, e2, e3)));
30191 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30193 static void ATTRIBUTE_UNUSED
30194 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30197 /* With Binutils 2.15, the "@unwind" marker must be specified on
30198 every occurrence of the ".eh_frame" section, not just the first
30201 && strcmp (name, ".eh_frame") == 0)
30203 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30204 flags & SECTION_WRITE ? "aw" : "a");
30207 default_elf_asm_named_section (name, flags, decl);
30210 /* Return the mangling of TYPE if it is an extended fundamental type. */
30212 static const char *
30213 ix86_mangle_type (const_tree type)
30215 type = TYPE_MAIN_VARIANT (type);
30217 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30218 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30221 switch (TYPE_MODE (type))
30224 /* __float128 is "g". */
30227 /* "long double" or __float80 is "e". */
30234 /* For 32-bit code we can save PIC register setup by using
30235 __stack_chk_fail_local hidden function instead of calling
30236 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30237 register, so it is better to call __stack_chk_fail directly. */
30240 ix86_stack_protect_fail (void)
30242 return TARGET_64BIT
30243 ? default_external_stack_protect_fail ()
30244 : default_hidden_stack_protect_fail ();
30247 /* Select a format to encode pointers in exception handling data. CODE
30248 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30249 true if the symbol may be affected by dynamic relocations.
30251 ??? All x86 object file formats are capable of representing this.
30252 After all, the relocation needed is the same as for the call insn.
30253 Whether or not a particular assembler allows us to enter such, I
30254 guess we'll have to see. */
30256 asm_preferred_eh_data_format (int code, int global)
30260 int type = DW_EH_PE_sdata8;
30262 || ix86_cmodel == CM_SMALL_PIC
30263 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30264 type = DW_EH_PE_sdata4;
30265 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30267 if (ix86_cmodel == CM_SMALL
30268 || (ix86_cmodel == CM_MEDIUM && code))
30269 return DW_EH_PE_udata4;
30270 return DW_EH_PE_absptr;
30273 /* Expand copysign from SIGN to the positive value ABS_VALUE
30274 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30277 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30279 enum machine_mode mode = GET_MODE (sign);
30280 rtx sgn = gen_reg_rtx (mode);
30281 if (mask == NULL_RTX)
30283 enum machine_mode vmode;
30285 if (mode == SFmode)
30287 else if (mode == DFmode)
30292 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
30293 if (!VECTOR_MODE_P (mode))
30295 /* We need to generate a scalar mode mask in this case. */
30296 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30297 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30298 mask = gen_reg_rtx (mode);
30299 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30303 mask = gen_rtx_NOT (mode, mask);
30304 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30305 gen_rtx_AND (mode, mask, sign)));
30306 emit_insn (gen_rtx_SET (VOIDmode, result,
30307 gen_rtx_IOR (mode, abs_value, sgn)));
30310 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30311 mask for masking out the sign-bit is stored in *SMASK, if that is
30314 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30316 enum machine_mode vmode, mode = GET_MODE (op0);
30319 xa = gen_reg_rtx (mode);
30320 if (mode == SFmode)
30322 else if (mode == DFmode)
30326 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
30327 if (!VECTOR_MODE_P (mode))
30329 /* We need to generate a scalar mode mask in this case. */
30330 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30331 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30332 mask = gen_reg_rtx (mode);
30333 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30335 emit_insn (gen_rtx_SET (VOIDmode, xa,
30336 gen_rtx_AND (mode, op0, mask)));
30344 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30345 swapping the operands if SWAP_OPERANDS is true. The expanded
30346 code is a forward jump to a newly created label in case the
30347 comparison is true. The generated label rtx is returned. */
30349 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30350 bool swap_operands)
30361 label = gen_label_rtx ();
30362 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30363 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30364 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30365 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30366 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30367 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30368 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30369 JUMP_LABEL (tmp) = label;
30374 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30375 using comparison code CODE. Operands are swapped for the comparison if
30376 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30378 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30379 bool swap_operands)
30381 enum machine_mode mode = GET_MODE (op0);
30382 rtx mask = gen_reg_rtx (mode);
30391 if (mode == DFmode)
30392 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30393 gen_rtx_fmt_ee (code, mode, op0, op1)));
30395 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30396 gen_rtx_fmt_ee (code, mode, op0, op1)));
30401 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30402 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30404 ix86_gen_TWO52 (enum machine_mode mode)
30406 REAL_VALUE_TYPE TWO52r;
30409 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30410 TWO52 = const_double_from_real_value (TWO52r, mode);
30411 TWO52 = force_reg (mode, TWO52);
30416 /* Expand SSE sequence for computing lround from OP1 storing
30419 ix86_expand_lround (rtx op0, rtx op1)
30421 /* C code for the stuff we're doing below:
30422 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30425 enum machine_mode mode = GET_MODE (op1);
30426 const struct real_format *fmt;
30427 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30430 /* load nextafter (0.5, 0.0) */
30431 fmt = REAL_MODE_FORMAT (mode);
30432 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30433 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30435 /* adj = copysign (0.5, op1) */
30436 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30437 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
30439 /* adj = op1 + adj */
30440 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
30442 /* op0 = (imode)adj */
30443 expand_fix (op0, adj, 0);
30446 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
30449 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
30451 /* C code for the stuff we're doing below (for do_floor):
30453 xi -= (double)xi > op1 ? 1 : 0;
30456 enum machine_mode fmode = GET_MODE (op1);
30457 enum machine_mode imode = GET_MODE (op0);
30458 rtx ireg, freg, label, tmp;
30460 /* reg = (long)op1 */
30461 ireg = gen_reg_rtx (imode);
30462 expand_fix (ireg, op1, 0);
30464 /* freg = (double)reg */
30465 freg = gen_reg_rtx (fmode);
30466 expand_float (freg, ireg, 0);
30468 /* ireg = (freg > op1) ? ireg - 1 : ireg */
30469 label = ix86_expand_sse_compare_and_jump (UNLE,
30470 freg, op1, !do_floor);
30471 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30472 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30473 emit_move_insn (ireg, tmp);
30475 emit_label (label);
30476 LABEL_NUSES (label) = 1;
30478 emit_move_insn (op0, ireg);
30481 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30482 result in OPERAND0. */
30484 ix86_expand_rint (rtx operand0, rtx operand1)
30486 /* C code for the stuff we're doing below:
30487 xa = fabs (operand1);
30488 if (!isless (xa, 2**52))
30490 xa = xa + 2**52 - 2**52;
30491 return copysign (xa, operand1);
30493 enum machine_mode mode = GET_MODE (operand0);
30494 rtx res, xa, label, TWO52, mask;
30496 res = gen_reg_rtx (mode);
30497 emit_move_insn (res, operand1);
30499 /* xa = abs (operand1) */
30500 xa = ix86_expand_sse_fabs (res, &mask);
30502 /* if (!isless (xa, TWO52)) goto label; */
30503 TWO52 = ix86_gen_TWO52 (mode);
30504 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30506 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30507 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30509 ix86_sse_copysign_to_positive (res, xa, res, mask);
30511 emit_label (label);
30512 LABEL_NUSES (label) = 1;
30514 emit_move_insn (operand0, res);
30517 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30520 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30522 /* C code for the stuff we expand below.
30523 double xa = fabs (x), x2;
30524 if (!isless (xa, TWO52))
30526 xa = xa + TWO52 - TWO52;
30527 x2 = copysign (xa, x);
30536 enum machine_mode mode = GET_MODE (operand0);
30537 rtx xa, TWO52, tmp, label, one, res, mask;
30539 TWO52 = ix86_gen_TWO52 (mode);
30541 /* Temporary for holding the result, initialized to the input
30542 operand to ease control flow. */
30543 res = gen_reg_rtx (mode);
30544 emit_move_insn (res, operand1);
30546 /* xa = abs (operand1) */
30547 xa = ix86_expand_sse_fabs (res, &mask);
30549 /* if (!isless (xa, TWO52)) goto label; */
30550 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30552 /* xa = xa + TWO52 - TWO52; */
30553 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30554 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30556 /* xa = copysign (xa, operand1) */
30557 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30559 /* generate 1.0 or -1.0 */
30560 one = force_reg (mode,
30561 const_double_from_real_value (do_floor
30562 ? dconst1 : dconstm1, mode));
30564 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30565 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30566 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30567 gen_rtx_AND (mode, one, tmp)));
30568 /* We always need to subtract here to preserve signed zero. */
30569 tmp = expand_simple_binop (mode, MINUS,
30570 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30571 emit_move_insn (res, tmp);
30573 emit_label (label);
30574 LABEL_NUSES (label) = 1;
30576 emit_move_insn (operand0, res);
30579 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30582 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30584 /* C code for the stuff we expand below.
30585 double xa = fabs (x), x2;
30586 if (!isless (xa, TWO52))
30588 x2 = (double)(long)x;
30595 if (HONOR_SIGNED_ZEROS (mode))
30596 return copysign (x2, x);
30599 enum machine_mode mode = GET_MODE (operand0);
30600 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30602 TWO52 = ix86_gen_TWO52 (mode);
30604 /* Temporary for holding the result, initialized to the input
30605 operand to ease control flow. */
30606 res = gen_reg_rtx (mode);
30607 emit_move_insn (res, operand1);
30609 /* xa = abs (operand1) */
30610 xa = ix86_expand_sse_fabs (res, &mask);
30612 /* if (!isless (xa, TWO52)) goto label; */
30613 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30615 /* xa = (double)(long)x */
30616 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30617 expand_fix (xi, res, 0);
30618 expand_float (xa, xi, 0);
30621 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30623 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30624 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30625 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30626 gen_rtx_AND (mode, one, tmp)));
30627 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30628 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30629 emit_move_insn (res, tmp);
30631 if (HONOR_SIGNED_ZEROS (mode))
30632 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30634 emit_label (label);
30635 LABEL_NUSES (label) = 1;
30637 emit_move_insn (operand0, res);
30640 /* Expand SSE sequence for computing round from OPERAND1 storing
30641 into OPERAND0. Sequence that works without relying on DImode truncation
30642 via cvttsd2siq that is only available on 64bit targets. */
30644 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30646 /* C code for the stuff we expand below.
30647 double xa = fabs (x), xa2, x2;
30648 if (!isless (xa, TWO52))
30650 Using the absolute value and copying back sign makes
30651 -0.0 -> -0.0 correct.
30652 xa2 = xa + TWO52 - TWO52;
30657 else if (dxa > 0.5)
30659 x2 = copysign (xa2, x);
30662 enum machine_mode mode = GET_MODE (operand0);
30663 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30665 TWO52 = ix86_gen_TWO52 (mode);
30667 /* Temporary for holding the result, initialized to the input
30668 operand to ease control flow. */
30669 res = gen_reg_rtx (mode);
30670 emit_move_insn (res, operand1);
30672 /* xa = abs (operand1) */
30673 xa = ix86_expand_sse_fabs (res, &mask);
30675 /* if (!isless (xa, TWO52)) goto label; */
30676 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30678 /* xa2 = xa + TWO52 - TWO52; */
30679 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30680 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30682 /* dxa = xa2 - xa; */
30683 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30685 /* generate 0.5, 1.0 and -0.5 */
30686 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30687 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30688 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30692 tmp = gen_reg_rtx (mode);
30693 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30694 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30695 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30696 gen_rtx_AND (mode, one, tmp)));
30697 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30698 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30699 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30700 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30701 gen_rtx_AND (mode, one, tmp)));
30702 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30704 /* res = copysign (xa2, operand1) */
30705 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30707 emit_label (label);
30708 LABEL_NUSES (label) = 1;
30710 emit_move_insn (operand0, res);
30713 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30716 ix86_expand_trunc (rtx operand0, rtx operand1)
30718 /* C code for SSE variant we expand below.
30719 double xa = fabs (x), x2;
30720 if (!isless (xa, TWO52))
30722 x2 = (double)(long)x;
30723 if (HONOR_SIGNED_ZEROS (mode))
30724 return copysign (x2, x);
30727 enum machine_mode mode = GET_MODE (operand0);
30728 rtx xa, xi, TWO52, label, res, mask;
30730 TWO52 = ix86_gen_TWO52 (mode);
30732 /* Temporary for holding the result, initialized to the input
30733 operand to ease control flow. */
30734 res = gen_reg_rtx (mode);
30735 emit_move_insn (res, operand1);
30737 /* xa = abs (operand1) */
30738 xa = ix86_expand_sse_fabs (res, &mask);
30740 /* if (!isless (xa, TWO52)) goto label; */
30741 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30743 /* x = (double)(long)x */
30744 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30745 expand_fix (xi, res, 0);
30746 expand_float (res, xi, 0);
30748 if (HONOR_SIGNED_ZEROS (mode))
30749 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30751 emit_label (label);
30752 LABEL_NUSES (label) = 1;
30754 emit_move_insn (operand0, res);
30757 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30760 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30762 enum machine_mode mode = GET_MODE (operand0);
30763 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30765 /* C code for SSE variant we expand below.
30766 double xa = fabs (x), x2;
30767 if (!isless (xa, TWO52))
30769 xa2 = xa + TWO52 - TWO52;
30773 x2 = copysign (xa2, x);
30777 TWO52 = ix86_gen_TWO52 (mode);
30779 /* Temporary for holding the result, initialized to the input
30780 operand to ease control flow. */
30781 res = gen_reg_rtx (mode);
30782 emit_move_insn (res, operand1);
30784 /* xa = abs (operand1) */
30785 xa = ix86_expand_sse_fabs (res, &smask);
30787 /* if (!isless (xa, TWO52)) goto label; */
30788 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30790 /* res = xa + TWO52 - TWO52; */
30791 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30792 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30793 emit_move_insn (res, tmp);
30796 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30798 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30799 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30800 emit_insn (gen_rtx_SET (VOIDmode, mask,
30801 gen_rtx_AND (mode, mask, one)));
30802 tmp = expand_simple_binop (mode, MINUS,
30803 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30804 emit_move_insn (res, tmp);
30806 /* res = copysign (res, operand1) */
30807 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30809 emit_label (label);
30810 LABEL_NUSES (label) = 1;
30812 emit_move_insn (operand0, res);
30815 /* Expand SSE sequence for computing round from OPERAND1 storing
30818 ix86_expand_round (rtx operand0, rtx operand1)
30820 /* C code for the stuff we're doing below:
30821 double xa = fabs (x);
30822 if (!isless (xa, TWO52))
30824 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30825 return copysign (xa, x);
30827 enum machine_mode mode = GET_MODE (operand0);
30828 rtx res, TWO52, xa, label, xi, half, mask;
30829 const struct real_format *fmt;
30830 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30832 /* Temporary for holding the result, initialized to the input
30833 operand to ease control flow. */
30834 res = gen_reg_rtx (mode);
30835 emit_move_insn (res, operand1);
30837 TWO52 = ix86_gen_TWO52 (mode);
30838 xa = ix86_expand_sse_fabs (res, &mask);
30839 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30841 /* load nextafter (0.5, 0.0) */
30842 fmt = REAL_MODE_FORMAT (mode);
30843 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30844 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30846 /* xa = xa + 0.5 */
30847 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30848 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30850 /* xa = (double)(int64_t)xa */
30851 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30852 expand_fix (xi, xa, 0);
30853 expand_float (xa, xi, 0);
30855 /* res = copysign (xa, operand1) */
30856 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30858 emit_label (label);
30859 LABEL_NUSES (label) = 1;
30861 emit_move_insn (operand0, res);
30865 /* Table of valid machine attributes. */
30866 static const struct attribute_spec ix86_attribute_table[] =
30868 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30869 /* Stdcall attribute says callee is responsible for popping arguments
30870 if they are not variable. */
30871 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30872 /* Fastcall attribute says callee is responsible for popping arguments
30873 if they are not variable. */
30874 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30875 /* Thiscall attribute says callee is responsible for popping arguments
30876 if they are not variable. */
30877 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30878 /* Cdecl attribute says the callee is a normal C declaration */
30879 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30880 /* Regparm attribute specifies how many integer arguments are to be
30881 passed in registers. */
30882 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30883 /* Sseregparm attribute says we are using x86_64 calling conventions
30884 for FP arguments. */
30885 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30886 /* force_align_arg_pointer says this function realigns the stack at entry. */
30887 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30888 false, true, true, ix86_handle_cconv_attribute },
30889 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30890 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30891 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30892 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30894 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30895 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30896 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30897 SUBTARGET_ATTRIBUTE_TABLE,
30899 /* ms_abi and sysv_abi calling convention function attributes. */
30900 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30901 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30902 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30904 { NULL, 0, 0, false, false, false, NULL }
30907 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30909 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30910 tree vectype ATTRIBUTE_UNUSED,
30911 int misalign ATTRIBUTE_UNUSED)
30913 switch (type_of_cost)
30916 return ix86_cost->scalar_stmt_cost;
30919 return ix86_cost->scalar_load_cost;
30922 return ix86_cost->scalar_store_cost;
30925 return ix86_cost->vec_stmt_cost;
30928 return ix86_cost->vec_align_load_cost;
30931 return ix86_cost->vec_store_cost;
30933 case vec_to_scalar:
30934 return ix86_cost->vec_to_scalar_cost;
30936 case scalar_to_vec:
30937 return ix86_cost->scalar_to_vec_cost;
30939 case unaligned_load:
30940 case unaligned_store:
30941 return ix86_cost->vec_unalign_load_cost;
30943 case cond_branch_taken:
30944 return ix86_cost->cond_taken_branch_cost;
30946 case cond_branch_not_taken:
30947 return ix86_cost->cond_not_taken_branch_cost;
30953 gcc_unreachable ();
30958 /* Implement targetm.vectorize.builtin_vec_perm. */
30961 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30963 tree itype = TREE_TYPE (vec_type);
30964 bool u = TYPE_UNSIGNED (itype);
30965 enum machine_mode vmode = TYPE_MODE (vec_type);
30966 enum ix86_builtins fcode;
30967 bool ok = TARGET_SSE2;
30973 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30976 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30978 itype = ix86_get_builtin_type (IX86_BT_DI);
30983 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30987 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30989 itype = ix86_get_builtin_type (IX86_BT_SI);
30993 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30996 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30999 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
31002 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
31012 *mask_type = itype;
31013 return ix86_builtins[(int) fcode];
31016 /* Return a vector mode with twice as many elements as VMODE. */
31017 /* ??? Consider moving this to a table generated by genmodes.c. */
31019 static enum machine_mode
31020 doublesize_vector_mode (enum machine_mode vmode)
31024 case V2SFmode: return V4SFmode;
31025 case V1DImode: return V2DImode;
31026 case V2SImode: return V4SImode;
31027 case V4HImode: return V8HImode;
31028 case V8QImode: return V16QImode;
31030 case V2DFmode: return V4DFmode;
31031 case V4SFmode: return V8SFmode;
31032 case V2DImode: return V4DImode;
31033 case V4SImode: return V8SImode;
31034 case V8HImode: return V16HImode;
31035 case V16QImode: return V32QImode;
31037 case V4DFmode: return V8DFmode;
31038 case V8SFmode: return V16SFmode;
31039 case V4DImode: return V8DImode;
31040 case V8SImode: return V16SImode;
31041 case V16HImode: return V32HImode;
31042 case V32QImode: return V64QImode;
31045 gcc_unreachable ();
31049 /* Construct (set target (vec_select op0 (parallel perm))) and
31050 return true if that's a valid instruction in the active ISA. */
31053 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
31055 rtx rperm[MAX_VECT_LEN], x;
31058 for (i = 0; i < nelt; ++i)
31059 rperm[i] = GEN_INT (perm[i]);
31061 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
31062 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
31063 x = gen_rtx_SET (VOIDmode, target, x);
31066 if (recog_memoized (x) < 0)
31074 /* Similar, but generate a vec_concat from op0 and op1 as well. */
31077 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
31078 const unsigned char *perm, unsigned nelt)
31080 enum machine_mode v2mode;
31083 v2mode = doublesize_vector_mode (GET_MODE (op0));
31084 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
31085 return expand_vselect (target, x, perm, nelt);
31088 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31089 in terms of blendp[sd] / pblendw / pblendvb. */
31092 expand_vec_perm_blend (struct expand_vec_perm_d *d)
31094 enum machine_mode vmode = d->vmode;
31095 unsigned i, mask, nelt = d->nelt;
31096 rtx target, op0, op1, x;
31098 if (!TARGET_SSE4_1 || d->op0 == d->op1)
31100 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
31103 /* This is a blend, not a permute. Elements must stay in their
31104 respective lanes. */
31105 for (i = 0; i < nelt; ++i)
31107 unsigned e = d->perm[i];
31108 if (!(e == i || e == i + nelt))
31115 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31116 decision should be extracted elsewhere, so that we only try that
31117 sequence once all budget==3 options have been tried. */
31119 /* For bytes, see if bytes move in pairs so we can use pblendw with
31120 an immediate argument, rather than pblendvb with a vector argument. */
31121 if (vmode == V16QImode)
31123 bool pblendw_ok = true;
31124 for (i = 0; i < 16 && pblendw_ok; i += 2)
31125 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31129 rtx rperm[16], vperm;
31131 for (i = 0; i < nelt; ++i)
31132 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31134 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31135 vperm = force_reg (V16QImode, vperm);
31137 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31142 target = d->target;
31154 for (i = 0; i < nelt; ++i)
31155 mask |= (d->perm[i] >= nelt) << i;
31159 for (i = 0; i < 2; ++i)
31160 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31164 for (i = 0; i < 4; ++i)
31165 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31169 for (i = 0; i < 8; ++i)
31170 mask |= (d->perm[i * 2] >= 16) << i;
31174 target = gen_lowpart (vmode, target);
31175 op0 = gen_lowpart (vmode, op0);
31176 op1 = gen_lowpart (vmode, op1);
31180 gcc_unreachable ();
31183 /* This matches five different patterns with the different modes. */
31184 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31185 x = gen_rtx_SET (VOIDmode, target, x);
31191 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31192 in terms of the variable form of vpermilps.
31194 Note that we will have already failed the immediate input vpermilps,
31195 which requires that the high and low part shuffle be identical; the
31196 variable form doesn't require that. */
31199 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31201 rtx rperm[8], vperm;
31204 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31207 /* We can only permute within the 128-bit lane. */
31208 for (i = 0; i < 8; ++i)
31210 unsigned e = d->perm[i];
31211 if (i < 4 ? e >= 4 : e < 4)
31218 for (i = 0; i < 8; ++i)
31220 unsigned e = d->perm[i];
31222 /* Within each 128-bit lane, the elements of op0 are numbered
31223 from 0 and the elements of op1 are numbered from 4. */
31229 rperm[i] = GEN_INT (e);
31232 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31233 vperm = force_reg (V8SImode, vperm);
31234 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31239 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31240 in terms of pshufb or vpperm. */
31243 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31245 unsigned i, nelt, eltsz;
31246 rtx rperm[16], vperm, target, op0, op1;
31248 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31250 if (GET_MODE_SIZE (d->vmode) != 16)
31257 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31259 for (i = 0; i < nelt; ++i)
31261 unsigned j, e = d->perm[i];
31262 for (j = 0; j < eltsz; ++j)
31263 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31266 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31267 vperm = force_reg (V16QImode, vperm);
31269 target = gen_lowpart (V16QImode, d->target);
31270 op0 = gen_lowpart (V16QImode, d->op0);
31271 if (d->op0 == d->op1)
31272 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31275 op1 = gen_lowpart (V16QImode, d->op1);
31276 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31282 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31283 in a single instruction. */
31286 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31288 unsigned i, nelt = d->nelt;
31289 unsigned char perm2[MAX_VECT_LEN];
31291 /* Check plain VEC_SELECT first, because AVX has instructions that could
31292 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31293 input where SEL+CONCAT may not. */
31294 if (d->op0 == d->op1)
31296 int mask = nelt - 1;
31298 for (i = 0; i < nelt; i++)
31299 perm2[i] = d->perm[i] & mask;
31301 if (expand_vselect (d->target, d->op0, perm2, nelt))
31304 /* There are plenty of patterns in sse.md that are written for
31305 SEL+CONCAT and are not replicated for a single op. Perhaps
31306 that should be changed, to avoid the nastiness here. */
31308 /* Recognize interleave style patterns, which means incrementing
31309 every other permutation operand. */
31310 for (i = 0; i < nelt; i += 2)
31312 perm2[i] = d->perm[i] & mask;
31313 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31315 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31318 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31321 for (i = 0; i < nelt; i += 4)
31323 perm2[i + 0] = d->perm[i + 0] & mask;
31324 perm2[i + 1] = d->perm[i + 1] & mask;
31325 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31326 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31329 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31334 /* Finally, try the fully general two operand permute. */
31335 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31338 /* Recognize interleave style patterns with reversed operands. */
31339 if (d->op0 != d->op1)
31341 for (i = 0; i < nelt; ++i)
31343 unsigned e = d->perm[i];
31351 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31355 /* Try the SSE4.1 blend variable merge instructions. */
31356 if (expand_vec_perm_blend (d))
31359 /* Try one of the AVX vpermil variable permutations. */
31360 if (expand_vec_perm_vpermil (d))
31363 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31364 if (expand_vec_perm_pshufb (d))
31370 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31371 in terms of a pair of pshuflw + pshufhw instructions. */
31374 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31376 unsigned char perm2[MAX_VECT_LEN];
31380 if (d->vmode != V8HImode || d->op0 != d->op1)
31383 /* The two permutations only operate in 64-bit lanes. */
31384 for (i = 0; i < 4; ++i)
31385 if (d->perm[i] >= 4)
31387 for (i = 4; i < 8; ++i)
31388 if (d->perm[i] < 4)
31394 /* Emit the pshuflw. */
31395 memcpy (perm2, d->perm, 4);
31396 for (i = 4; i < 8; ++i)
31398 ok = expand_vselect (d->target, d->op0, perm2, 8);
31401 /* Emit the pshufhw. */
31402 memcpy (perm2 + 4, d->perm + 4, 4);
31403 for (i = 0; i < 4; ++i)
31405 ok = expand_vselect (d->target, d->target, perm2, 8);
31411 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31412 the permutation using the SSSE3 palignr instruction. This succeeds
31413 when all of the elements in PERM fit within one vector and we merely
31414 need to shift them down so that a single vector permutation has a
31415 chance to succeed. */
31418 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31420 unsigned i, nelt = d->nelt;
31425 /* Even with AVX, palignr only operates on 128-bit vectors. */
31426 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31429 min = nelt, max = 0;
31430 for (i = 0; i < nelt; ++i)
31432 unsigned e = d->perm[i];
31438 if (min == 0 || max - min >= nelt)
31441 /* Given that we have SSSE3, we know we'll be able to implement the
31442 single operand permutation after the palignr with pshufb. */
31446 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
31447 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
31448 gen_lowpart (TImode, d->op1),
31449 gen_lowpart (TImode, d->op0), shift));
31451 d->op0 = d->op1 = d->target;
31454 for (i = 0; i < nelt; ++i)
31456 unsigned e = d->perm[i] - min;
31462 /* Test for the degenerate case where the alignment by itself
31463 produces the desired permutation. */
31467 ok = expand_vec_perm_1 (d);
31473 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31474 a two vector permutation into a single vector permutation by using
31475 an interleave operation to merge the vectors. */
31478 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31480 struct expand_vec_perm_d dremap, dfinal;
31481 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31482 unsigned contents, h1, h2, h3, h4;
31483 unsigned char remap[2 * MAX_VECT_LEN];
31487 if (d->op0 == d->op1)
31490 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31491 lanes. We can use similar techniques with the vperm2f128 instruction,
31492 but it requires slightly different logic. */
31493 if (GET_MODE_SIZE (d->vmode) != 16)
31496 /* Examine from whence the elements come. */
31498 for (i = 0; i < nelt; ++i)
31499 contents |= 1u << d->perm[i];
31501 /* Split the two input vectors into 4 halves. */
31502 h1 = (1u << nelt2) - 1;
31507 memset (remap, 0xff, sizeof (remap));
31510 /* If the elements from the low halves use interleave low, and similarly
31511 for interleave high. If the elements are from mis-matched halves, we
31512 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31513 if ((contents & (h1 | h3)) == contents)
31515 for (i = 0; i < nelt2; ++i)
31518 remap[i + nelt] = i * 2 + 1;
31519 dremap.perm[i * 2] = i;
31520 dremap.perm[i * 2 + 1] = i + nelt;
31523 else if ((contents & (h2 | h4)) == contents)
31525 for (i = 0; i < nelt2; ++i)
31527 remap[i + nelt2] = i * 2;
31528 remap[i + nelt + nelt2] = i * 2 + 1;
31529 dremap.perm[i * 2] = i + nelt2;
31530 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31533 else if ((contents & (h1 | h4)) == contents)
31535 for (i = 0; i < nelt2; ++i)
31538 remap[i + nelt + nelt2] = i + nelt2;
31539 dremap.perm[i] = i;
31540 dremap.perm[i + nelt2] = i + nelt + nelt2;
31544 dremap.vmode = V2DImode;
31546 dremap.perm[0] = 0;
31547 dremap.perm[1] = 3;
31550 else if ((contents & (h2 | h3)) == contents)
31552 for (i = 0; i < nelt2; ++i)
31554 remap[i + nelt2] = i;
31555 remap[i + nelt] = i + nelt2;
31556 dremap.perm[i] = i + nelt2;
31557 dremap.perm[i + nelt2] = i + nelt;
31561 dremap.vmode = V2DImode;
31563 dremap.perm[0] = 1;
31564 dremap.perm[1] = 2;
31570 /* Use the remapping array set up above to move the elements from their
31571 swizzled locations into their final destinations. */
31573 for (i = 0; i < nelt; ++i)
31575 unsigned e = remap[d->perm[i]];
31576 gcc_assert (e < nelt);
31577 dfinal.perm[i] = e;
31579 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31580 dfinal.op1 = dfinal.op0;
31581 dremap.target = dfinal.op0;
31583 /* Test if the final remap can be done with a single insn. For V4SFmode or
31584 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31586 ok = expand_vec_perm_1 (&dfinal);
31587 seq = get_insns ();
31593 if (dremap.vmode != dfinal.vmode)
31595 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31596 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31597 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31600 ok = expand_vec_perm_1 (&dremap);
31607 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31608 permutation with two pshufb insns and an ior. We should have already
31609 failed all two instruction sequences. */
31612 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31614 rtx rperm[2][16], vperm, l, h, op, m128;
31615 unsigned int i, nelt, eltsz;
31617 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31619 gcc_assert (d->op0 != d->op1);
31622 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31624 /* Generate two permutation masks. If the required element is within
31625 the given vector it is shuffled into the proper lane. If the required
31626 element is in the other vector, force a zero into the lane by setting
31627 bit 7 in the permutation mask. */
31628 m128 = GEN_INT (-128);
31629 for (i = 0; i < nelt; ++i)
31631 unsigned j, e = d->perm[i];
31632 unsigned which = (e >= nelt);
31636 for (j = 0; j < eltsz; ++j)
31638 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31639 rperm[1-which][i*eltsz + j] = m128;
31643 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31644 vperm = force_reg (V16QImode, vperm);
31646 l = gen_reg_rtx (V16QImode);
31647 op = gen_lowpart (V16QImode, d->op0);
31648 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31650 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31651 vperm = force_reg (V16QImode, vperm);
31653 h = gen_reg_rtx (V16QImode);
31654 op = gen_lowpart (V16QImode, d->op1);
31655 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31657 op = gen_lowpart (V16QImode, d->target);
31658 emit_insn (gen_iorv16qi3 (op, l, h));
31663 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31664 and extract-odd permutations. */
31667 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31674 t1 = gen_reg_rtx (V4DFmode);
31675 t2 = gen_reg_rtx (V4DFmode);
31677 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31678 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31679 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31681 /* Now an unpck[lh]pd will produce the result required. */
31683 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31685 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31691 int mask = odd ? 0xdd : 0x88;
31693 t1 = gen_reg_rtx (V8SFmode);
31694 t2 = gen_reg_rtx (V8SFmode);
31695 t3 = gen_reg_rtx (V8SFmode);
31697 /* Shuffle within the 128-bit lanes to produce:
31698 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
31699 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
31702 /* Shuffle the lanes around to produce:
31703 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
31704 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
31707 /* Shuffle within the 128-bit lanes to produce:
31708 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
31709 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
31711 /* Shuffle within the 128-bit lanes to produce:
31712 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
31713 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
31715 /* Shuffle the lanes around to produce:
31716 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
31717 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
31726 /* These are always directly implementable by expand_vec_perm_1. */
31727 gcc_unreachable ();
31731 return expand_vec_perm_pshufb2 (d);
31734 /* We need 2*log2(N)-1 operations to achieve odd/even
31735 with interleave. */
31736 t1 = gen_reg_rtx (V8HImode);
31737 t2 = gen_reg_rtx (V8HImode);
31738 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31739 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31740 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31741 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31743 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31745 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31752 return expand_vec_perm_pshufb2 (d);
31755 t1 = gen_reg_rtx (V16QImode);
31756 t2 = gen_reg_rtx (V16QImode);
31757 t3 = gen_reg_rtx (V16QImode);
31758 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31759 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31760 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31761 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31762 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31763 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31765 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31767 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31773 gcc_unreachable ();
31779 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31780 extract-even and extract-odd permutations. */
31783 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31785 unsigned i, odd, nelt = d->nelt;
31788 if (odd != 0 && odd != 1)
31791 for (i = 1; i < nelt; ++i)
31792 if (d->perm[i] != 2 * i + odd)
31795 return expand_vec_perm_even_odd_1 (d, odd);
31798 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31799 permutations. We assume that expand_vec_perm_1 has already failed. */
31802 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31804 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31805 enum machine_mode vmode = d->vmode;
31806 unsigned char perm2[4];
31814 /* These are special-cased in sse.md so that we can optionally
31815 use the vbroadcast instruction. They expand to two insns
31816 if the input happens to be in a register. */
31817 gcc_unreachable ();
31823 /* These are always implementable using standard shuffle patterns. */
31824 gcc_unreachable ();
31828 /* These can be implemented via interleave. We save one insn by
31829 stopping once we have promoted to V4SImode and then use pshufd. */
31832 optab otab = vec_interleave_low_optab;
31836 otab = vec_interleave_high_optab;
31841 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31842 vmode = get_mode_wider_vector (vmode);
31843 op0 = gen_lowpart (vmode, op0);
31845 while (vmode != V4SImode);
31847 memset (perm2, elt, 4);
31848 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31853 gcc_unreachable ();
31857 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31858 broadcast permutations. */
31861 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31863 unsigned i, elt, nelt = d->nelt;
31865 if (d->op0 != d->op1)
31869 for (i = 1; i < nelt; ++i)
31870 if (d->perm[i] != elt)
31873 return expand_vec_perm_broadcast_1 (d);
31876 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31877 With all of the interface bits taken care of, perform the expansion
31878 in D and return true on success. */
31881 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31883 /* Try a single instruction expansion. */
31884 if (expand_vec_perm_1 (d))
31887 /* Try sequences of two instructions. */
31889 if (expand_vec_perm_pshuflw_pshufhw (d))
31892 if (expand_vec_perm_palignr (d))
31895 if (expand_vec_perm_interleave2 (d))
31898 if (expand_vec_perm_broadcast (d))
31901 /* Try sequences of three instructions. */
31903 if (expand_vec_perm_pshufb2 (d))
31906 /* ??? Look for narrow permutations whose element orderings would
31907 allow the promotion to a wider mode. */
31909 /* ??? Look for sequences of interleave or a wider permute that place
31910 the data into the correct lanes for a half-vector shuffle like
31911 pshuf[lh]w or vpermilps. */
31913 /* ??? Look for sequences of interleave that produce the desired results.
31914 The combinatorics of punpck[lh] get pretty ugly... */
31916 if (expand_vec_perm_even_odd (d))
31922 /* Extract the values from the vector CST into the permutation array in D.
31923 Return 0 on error, 1 if all values from the permutation come from the
31924 first vector, 2 if all values from the second vector, and 3 otherwise. */
31927 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31929 tree list = TREE_VECTOR_CST_ELTS (cst);
31930 unsigned i, nelt = d->nelt;
31933 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31935 unsigned HOST_WIDE_INT e;
31937 if (!host_integerp (TREE_VALUE (list), 1))
31939 e = tree_low_cst (TREE_VALUE (list), 1);
31943 ret |= (e < nelt ? 1 : 2);
31946 gcc_assert (list == NULL);
31948 /* For all elements from second vector, fold the elements to first. */
31950 for (i = 0; i < nelt; ++i)
31951 d->perm[i] -= nelt;
31957 ix86_expand_vec_perm_builtin (tree exp)
31959 struct expand_vec_perm_d d;
31960 tree arg0, arg1, arg2;
31962 arg0 = CALL_EXPR_ARG (exp, 0);
31963 arg1 = CALL_EXPR_ARG (exp, 1);
31964 arg2 = CALL_EXPR_ARG (exp, 2);
31966 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31967 d.nelt = GET_MODE_NUNITS (d.vmode);
31968 d.testing_p = false;
31969 gcc_assert (VECTOR_MODE_P (d.vmode));
31971 if (TREE_CODE (arg2) != VECTOR_CST)
31973 error_at (EXPR_LOCATION (exp),
31974 "vector permutation requires vector constant");
31978 switch (extract_vec_perm_cst (&d, arg2))
31984 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31988 if (!operand_equal_p (arg0, arg1, 0))
31990 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31991 d.op0 = force_reg (d.vmode, d.op0);
31992 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31993 d.op1 = force_reg (d.vmode, d.op1);
31997 /* The elements of PERM do not suggest that only the first operand
31998 is used, but both operands are identical. Allow easier matching
31999 of the permutation by folding the permutation into the single
32002 unsigned i, nelt = d.nelt;
32003 for (i = 0; i < nelt; ++i)
32004 if (d.perm[i] >= nelt)
32010 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
32011 d.op0 = force_reg (d.vmode, d.op0);
32016 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
32017 d.op0 = force_reg (d.vmode, d.op0);
32022 d.target = gen_reg_rtx (d.vmode);
32023 if (ix86_expand_vec_perm_builtin_1 (&d))
32026 /* For compiler generated permutations, we should never got here, because
32027 the compiler should also be checking the ok hook. But since this is a
32028 builtin the user has access too, so don't abort. */
32032 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
32035 sorry ("vector permutation (%d %d %d %d)",
32036 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
32039 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
32040 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
32041 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
32044 sorry ("vector permutation "
32045 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
32046 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
32047 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
32048 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
32049 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
32052 gcc_unreachable ();
32055 return CONST0_RTX (d.vmode);
32058 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
32061 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
32063 struct expand_vec_perm_d d;
32067 d.vmode = TYPE_MODE (vec_type);
32068 d.nelt = GET_MODE_NUNITS (d.vmode);
32069 d.testing_p = true;
32071 /* Given sufficient ISA support we can just return true here
32072 for selected vector modes. */
32073 if (GET_MODE_SIZE (d.vmode) == 16)
32075 /* All implementable with a single vpperm insn. */
32078 /* All implementable with 2 pshufb + 1 ior. */
32081 /* All implementable with shufpd or unpck[lh]pd. */
32086 vec_mask = extract_vec_perm_cst (&d, mask);
32088 /* This hook is cannot be called in response to something that the
32089 user does (unlike the builtin expander) so we shouldn't ever see
32090 an error generated from the extract. */
32091 gcc_assert (vec_mask > 0 && vec_mask <= 3);
32092 one_vec = (vec_mask != 3);
32094 /* Implementable with shufps or pshufd. */
32095 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
32098 /* Otherwise we have to go through the motions and see if we can
32099 figure out how to generate the requested permutation. */
32100 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32101 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32103 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32106 ret = ix86_expand_vec_perm_builtin_1 (&d);
32113 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32115 struct expand_vec_perm_d d;
32121 d.vmode = GET_MODE (targ);
32122 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32123 d.testing_p = false;
32125 for (i = 0; i < nelt; ++i)
32126 d.perm[i] = i * 2 + odd;
32128 /* We'll either be able to implement the permutation directly... */
32129 if (expand_vec_perm_1 (&d))
32132 /* ... or we use the special-case patterns. */
32133 expand_vec_perm_even_odd_1 (&d, odd);
32136 /* This function returns the calling abi specific va_list type node.
32137 It returns the FNDECL specific va_list type. */
32140 ix86_fn_abi_va_list (tree fndecl)
32143 return va_list_type_node;
32144 gcc_assert (fndecl != NULL_TREE);
32146 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32147 return ms_va_list_type_node;
32149 return sysv_va_list_type_node;
32152 /* Returns the canonical va_list type specified by TYPE. If there
32153 is no valid TYPE provided, it return NULL_TREE. */
32156 ix86_canonical_va_list_type (tree type)
32160 /* Resolve references and pointers to va_list type. */
32161 if (TREE_CODE (type) == MEM_REF)
32162 type = TREE_TYPE (type);
32163 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32164 type = TREE_TYPE (type);
32165 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32166 type = TREE_TYPE (type);
32170 wtype = va_list_type_node;
32171 gcc_assert (wtype != NULL_TREE);
32173 if (TREE_CODE (wtype) == ARRAY_TYPE)
32175 /* If va_list is an array type, the argument may have decayed
32176 to a pointer type, e.g. by being passed to another function.
32177 In that case, unwrap both types so that we can compare the
32178 underlying records. */
32179 if (TREE_CODE (htype) == ARRAY_TYPE
32180 || POINTER_TYPE_P (htype))
32182 wtype = TREE_TYPE (wtype);
32183 htype = TREE_TYPE (htype);
32186 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32187 return va_list_type_node;
32188 wtype = sysv_va_list_type_node;
32189 gcc_assert (wtype != NULL_TREE);
32191 if (TREE_CODE (wtype) == ARRAY_TYPE)
32193 /* If va_list is an array type, the argument may have decayed
32194 to a pointer type, e.g. by being passed to another function.
32195 In that case, unwrap both types so that we can compare the
32196 underlying records. */
32197 if (TREE_CODE (htype) == ARRAY_TYPE
32198 || POINTER_TYPE_P (htype))
32200 wtype = TREE_TYPE (wtype);
32201 htype = TREE_TYPE (htype);
32204 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32205 return sysv_va_list_type_node;
32206 wtype = ms_va_list_type_node;
32207 gcc_assert (wtype != NULL_TREE);
32209 if (TREE_CODE (wtype) == ARRAY_TYPE)
32211 /* If va_list is an array type, the argument may have decayed
32212 to a pointer type, e.g. by being passed to another function.
32213 In that case, unwrap both types so that we can compare the
32214 underlying records. */
32215 if (TREE_CODE (htype) == ARRAY_TYPE
32216 || POINTER_TYPE_P (htype))
32218 wtype = TREE_TYPE (wtype);
32219 htype = TREE_TYPE (htype);
32222 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32223 return ms_va_list_type_node;
32226 return std_canonical_va_list_type (type);
32229 /* Iterate through the target-specific builtin types for va_list.
32230 IDX denotes the iterator, *PTREE is set to the result type of
32231 the va_list builtin, and *PNAME to its internal type.
32232 Returns zero if there is no element for this index, otherwise
32233 IDX should be increased upon the next call.
32234 Note, do not iterate a base builtin's name like __builtin_va_list.
32235 Used from c_common_nodes_and_builtins. */
32238 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32248 *ptree = ms_va_list_type_node;
32249 *pname = "__builtin_ms_va_list";
32253 *ptree = sysv_va_list_type_node;
32254 *pname = "__builtin_sysv_va_list";
32262 #undef TARGET_SCHED_DISPATCH
32263 #define TARGET_SCHED_DISPATCH has_dispatch
32264 #undef TARGET_SCHED_DISPATCH_DO
32265 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32267 /* The size of the dispatch window is the total number of bytes of
32268 object code allowed in a window. */
32269 #define DISPATCH_WINDOW_SIZE 16
32271 /* Number of dispatch windows considered for scheduling. */
32272 #define MAX_DISPATCH_WINDOWS 3
32274 /* Maximum number of instructions in a window. */
32277 /* Maximum number of immediate operands in a window. */
32280 /* Maximum number of immediate bits allowed in a window. */
32281 #define MAX_IMM_SIZE 128
32283 /* Maximum number of 32 bit immediates allowed in a window. */
32284 #define MAX_IMM_32 4
32286 /* Maximum number of 64 bit immediates allowed in a window. */
32287 #define MAX_IMM_64 2
32289 /* Maximum total of loads or prefetches allowed in a window. */
32292 /* Maximum total of stores allowed in a window. */
32293 #define MAX_STORE 1
32299 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32300 enum dispatch_group {
32315 /* Number of allowable groups in a dispatch window. It is an array
32316 indexed by dispatch_group enum. 100 is used as a big number,
32317 because the number of these kind of operations does not have any
32318 effect in dispatch window, but we need them for other reasons in
32320 static unsigned int num_allowable_groups[disp_last] = {
32321 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32324 char group_name[disp_last + 1][16] = {
32325 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32326 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32327 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32330 /* Instruction path. */
32333 path_single, /* Single micro op. */
32334 path_double, /* Double micro op. */
32335 path_multi, /* Instructions with more than 2 micro op.. */
32339 /* sched_insn_info defines a window to the instructions scheduled in
32340 the basic block. It contains a pointer to the insn_info table and
32341 the instruction scheduled.
32343 Windows are allocated for each basic block and are linked
32345 typedef struct sched_insn_info_s {
32347 enum dispatch_group group;
32348 enum insn_path path;
32353 /* Linked list of dispatch windows. This is a two way list of
32354 dispatch windows of a basic block. It contains information about
32355 the number of uops in the window and the total number of
32356 instructions and of bytes in the object code for this dispatch
32358 typedef struct dispatch_windows_s {
32359 int num_insn; /* Number of insn in the window. */
32360 int num_uops; /* Number of uops in the window. */
32361 int window_size; /* Number of bytes in the window. */
32362 int window_num; /* Window number between 0 or 1. */
32363 int num_imm; /* Number of immediates in an insn. */
32364 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32365 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32366 int imm_size; /* Total immediates in the window. */
32367 int num_loads; /* Total memory loads in the window. */
32368 int num_stores; /* Total memory stores in the window. */
32369 int violation; /* Violation exists in window. */
32370 sched_insn_info *window; /* Pointer to the window. */
32371 struct dispatch_windows_s *next;
32372 struct dispatch_windows_s *prev;
32373 } dispatch_windows;
32375 /* Immediate valuse used in an insn. */
32376 typedef struct imm_info_s
32383 static dispatch_windows *dispatch_window_list;
32384 static dispatch_windows *dispatch_window_list1;
32386 /* Get dispatch group of insn. */
32388 static enum dispatch_group
32389 get_mem_group (rtx insn)
32391 enum attr_memory memory;
32393 if (INSN_CODE (insn) < 0)
32394 return disp_no_group;
32395 memory = get_attr_memory (insn);
32396 if (memory == MEMORY_STORE)
32399 if (memory == MEMORY_LOAD)
32402 if (memory == MEMORY_BOTH)
32403 return disp_load_store;
32405 return disp_no_group;
32408 /* Return true if insn is a compare instruction. */
32413 enum attr_type type;
32415 type = get_attr_type (insn);
32416 return (type == TYPE_TEST
32417 || type == TYPE_ICMP
32418 || type == TYPE_FCMP
32419 || GET_CODE (PATTERN (insn)) == COMPARE);
32422 /* Return true if a dispatch violation encountered. */
32425 dispatch_violation (void)
32427 if (dispatch_window_list->next)
32428 return dispatch_window_list->next->violation;
32429 return dispatch_window_list->violation;
32432 /* Return true if insn is a branch instruction. */
32435 is_branch (rtx insn)
32437 return (CALL_P (insn) || JUMP_P (insn));
32440 /* Return true if insn is a prefetch instruction. */
32443 is_prefetch (rtx insn)
32445 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
32448 /* This function initializes a dispatch window and the list container holding a
32449 pointer to the window. */
32452 init_window (int window_num)
32455 dispatch_windows *new_list;
32457 if (window_num == 0)
32458 new_list = dispatch_window_list;
32460 new_list = dispatch_window_list1;
32462 new_list->num_insn = 0;
32463 new_list->num_uops = 0;
32464 new_list->window_size = 0;
32465 new_list->next = NULL;
32466 new_list->prev = NULL;
32467 new_list->window_num = window_num;
32468 new_list->num_imm = 0;
32469 new_list->num_imm_32 = 0;
32470 new_list->num_imm_64 = 0;
32471 new_list->imm_size = 0;
32472 new_list->num_loads = 0;
32473 new_list->num_stores = 0;
32474 new_list->violation = false;
32476 for (i = 0; i < MAX_INSN; i++)
32478 new_list->window[i].insn = NULL;
32479 new_list->window[i].group = disp_no_group;
32480 new_list->window[i].path = no_path;
32481 new_list->window[i].byte_len = 0;
32482 new_list->window[i].imm_bytes = 0;
32487 /* This function allocates and initializes a dispatch window and the
32488 list container holding a pointer to the window. */
32490 static dispatch_windows *
32491 allocate_window (void)
32493 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32494 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32499 /* This routine initializes the dispatch scheduling information. It
32500 initiates building dispatch scheduler tables and constructs the
32501 first dispatch window. */
32504 init_dispatch_sched (void)
32506 /* Allocate a dispatch list and a window. */
32507 dispatch_window_list = allocate_window ();
32508 dispatch_window_list1 = allocate_window ();
32513 /* This function returns true if a branch is detected. End of a basic block
32514 does not have to be a branch, but here we assume only branches end a
32518 is_end_basic_block (enum dispatch_group group)
32520 return group == disp_branch;
32523 /* This function is called when the end of a window processing is reached. */
32526 process_end_window (void)
32528 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32529 if (dispatch_window_list->next)
32531 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32532 gcc_assert (dispatch_window_list->window_size
32533 + dispatch_window_list1->window_size <= 48);
32539 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32540 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32541 for 48 bytes of instructions. Note that these windows are not dispatch
32542 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32544 static dispatch_windows *
32545 allocate_next_window (int window_num)
32547 if (window_num == 0)
32549 if (dispatch_window_list->next)
32552 return dispatch_window_list;
32555 dispatch_window_list->next = dispatch_window_list1;
32556 dispatch_window_list1->prev = dispatch_window_list;
32558 return dispatch_window_list1;
32561 /* Increment the number of immediate operands of an instruction. */
32564 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32569 switch ( GET_CODE (*in_rtx))
32574 (imm_values->imm)++;
32575 if (x86_64_immediate_operand (*in_rtx, SImode))
32576 (imm_values->imm32)++;
32578 (imm_values->imm64)++;
32582 (imm_values->imm)++;
32583 (imm_values->imm64)++;
32587 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32589 (imm_values->imm)++;
32590 (imm_values->imm32)++;
32601 /* Compute number of immediate operands of an instruction. */
32604 find_constant (rtx in_rtx, imm_info *imm_values)
32606 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32607 (rtx_function) find_constant_1, (void *) imm_values);
32610 /* Return total size of immediate operands of an instruction along with number
32611 of corresponding immediate-operands. It initializes its parameters to zero
32612 befor calling FIND_CONSTANT.
32613 INSN is the input instruction. IMM is the total of immediates.
32614 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32618 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32620 imm_info imm_values = {0, 0, 0};
32622 find_constant (insn, &imm_values);
32623 *imm = imm_values.imm;
32624 *imm32 = imm_values.imm32;
32625 *imm64 = imm_values.imm64;
32626 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32629 /* This function indicates if an operand of an instruction is an
32633 has_immediate (rtx insn)
32635 int num_imm_operand;
32636 int num_imm32_operand;
32637 int num_imm64_operand;
32640 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32641 &num_imm64_operand);
32645 /* Return single or double path for instructions. */
32647 static enum insn_path
32648 get_insn_path (rtx insn)
32650 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32652 if ((int)path == 0)
32653 return path_single;
32655 if ((int)path == 1)
32656 return path_double;
32661 /* Return insn dispatch group. */
32663 static enum dispatch_group
32664 get_insn_group (rtx insn)
32666 enum dispatch_group group = get_mem_group (insn);
32670 if (is_branch (insn))
32671 return disp_branch;
32676 if (has_immediate (insn))
32679 if (is_prefetch (insn))
32680 return disp_prefetch;
32682 return disp_no_group;
32685 /* Count number of GROUP restricted instructions in a dispatch
32686 window WINDOW_LIST. */
32689 count_num_restricted (rtx insn, dispatch_windows *window_list)
32691 enum dispatch_group group = get_insn_group (insn);
32693 int num_imm_operand;
32694 int num_imm32_operand;
32695 int num_imm64_operand;
32697 if (group == disp_no_group)
32700 if (group == disp_imm)
32702 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32703 &num_imm64_operand);
32704 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32705 || num_imm_operand + window_list->num_imm > MAX_IMM
32706 || (num_imm32_operand > 0
32707 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32708 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32709 || (num_imm64_operand > 0
32710 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32711 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32712 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32713 && num_imm64_operand > 0
32714 && ((window_list->num_imm_64 > 0
32715 && window_list->num_insn >= 2)
32716 || window_list->num_insn >= 3)))
32722 if ((group == disp_load_store
32723 && (window_list->num_loads >= MAX_LOAD
32724 || window_list->num_stores >= MAX_STORE))
32725 || ((group == disp_load
32726 || group == disp_prefetch)
32727 && window_list->num_loads >= MAX_LOAD)
32728 || (group == disp_store
32729 && window_list->num_stores >= MAX_STORE))
32735 /* This function returns true if insn satisfies dispatch rules on the
32736 last window scheduled. */
32739 fits_dispatch_window (rtx insn)
32741 dispatch_windows *window_list = dispatch_window_list;
32742 dispatch_windows *window_list_next = dispatch_window_list->next;
32743 unsigned int num_restrict;
32744 enum dispatch_group group = get_insn_group (insn);
32745 enum insn_path path = get_insn_path (insn);
32748 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32749 instructions should be given the lowest priority in the
32750 scheduling process in Haifa scheduler to make sure they will be
32751 scheduled in the same dispatch window as the refrence to them. */
32752 if (group == disp_jcc || group == disp_cmp)
32755 /* Check nonrestricted. */
32756 if (group == disp_no_group || group == disp_branch)
32759 /* Get last dispatch window. */
32760 if (window_list_next)
32761 window_list = window_list_next;
32763 if (window_list->window_num == 1)
32765 sum = window_list->prev->window_size + window_list->window_size;
32768 || (min_insn_size (insn) + sum) >= 48)
32769 /* Window 1 is full. Go for next window. */
32773 num_restrict = count_num_restricted (insn, window_list);
32775 if (num_restrict > num_allowable_groups[group])
32778 /* See if it fits in the first window. */
32779 if (window_list->window_num == 0)
32781 /* The first widow should have only single and double path
32783 if (path == path_double
32784 && (window_list->num_uops + 2) > MAX_INSN)
32786 else if (path != path_single)
32792 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32793 dispatch window WINDOW_LIST. */
32796 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32798 int byte_len = min_insn_size (insn);
32799 int num_insn = window_list->num_insn;
32801 sched_insn_info *window = window_list->window;
32802 enum dispatch_group group = get_insn_group (insn);
32803 enum insn_path path = get_insn_path (insn);
32804 int num_imm_operand;
32805 int num_imm32_operand;
32806 int num_imm64_operand;
32808 if (!window_list->violation && group != disp_cmp
32809 && !fits_dispatch_window (insn))
32810 window_list->violation = true;
32812 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32813 &num_imm64_operand);
32815 /* Initialize window with new instruction. */
32816 window[num_insn].insn = insn;
32817 window[num_insn].byte_len = byte_len;
32818 window[num_insn].group = group;
32819 window[num_insn].path = path;
32820 window[num_insn].imm_bytes = imm_size;
32822 window_list->window_size += byte_len;
32823 window_list->num_insn = num_insn + 1;
32824 window_list->num_uops = window_list->num_uops + num_uops;
32825 window_list->imm_size += imm_size;
32826 window_list->num_imm += num_imm_operand;
32827 window_list->num_imm_32 += num_imm32_operand;
32828 window_list->num_imm_64 += num_imm64_operand;
32830 if (group == disp_store)
32831 window_list->num_stores += 1;
32832 else if (group == disp_load
32833 || group == disp_prefetch)
32834 window_list->num_loads += 1;
32835 else if (group == disp_load_store)
32837 window_list->num_stores += 1;
32838 window_list->num_loads += 1;
32842 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32843 If the total bytes of instructions or the number of instructions in
32844 the window exceed allowable, it allocates a new window. */
32847 add_to_dispatch_window (rtx insn)
32850 dispatch_windows *window_list;
32851 dispatch_windows *next_list;
32852 dispatch_windows *window0_list;
32853 enum insn_path path;
32854 enum dispatch_group insn_group;
32862 if (INSN_CODE (insn) < 0)
32865 byte_len = min_insn_size (insn);
32866 window_list = dispatch_window_list;
32867 next_list = window_list->next;
32868 path = get_insn_path (insn);
32869 insn_group = get_insn_group (insn);
32871 /* Get the last dispatch window. */
32873 window_list = dispatch_window_list->next;
32875 if (path == path_single)
32877 else if (path == path_double)
32880 insn_num_uops = (int) path;
32882 /* If current window is full, get a new window.
32883 Window number zero is full, if MAX_INSN uops are scheduled in it.
32884 Window number one is full, if window zero's bytes plus window
32885 one's bytes is 32, or if the bytes of the new instruction added
32886 to the total makes it greater than 48, or it has already MAX_INSN
32887 instructions in it. */
32888 num_insn = window_list->num_insn;
32889 num_uops = window_list->num_uops;
32890 window_num = window_list->window_num;
32891 insn_fits = fits_dispatch_window (insn);
32893 if (num_insn >= MAX_INSN
32894 || num_uops + insn_num_uops > MAX_INSN
32897 window_num = ~window_num & 1;
32898 window_list = allocate_next_window (window_num);
32901 if (window_num == 0)
32903 add_insn_window (insn, window_list, insn_num_uops);
32904 if (window_list->num_insn >= MAX_INSN
32905 && insn_group == disp_branch)
32907 process_end_window ();
32911 else if (window_num == 1)
32913 window0_list = window_list->prev;
32914 sum = window0_list->window_size + window_list->window_size;
32916 || (byte_len + sum) >= 48)
32918 process_end_window ();
32919 window_list = dispatch_window_list;
32922 add_insn_window (insn, window_list, insn_num_uops);
32925 gcc_unreachable ();
32927 if (is_end_basic_block (insn_group))
32929 /* End of basic block is reached do end-basic-block process. */
32930 process_end_window ();
32935 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32937 DEBUG_FUNCTION static void
32938 debug_dispatch_window_file (FILE *file, int window_num)
32940 dispatch_windows *list;
32943 if (window_num == 0)
32944 list = dispatch_window_list;
32946 list = dispatch_window_list1;
32948 fprintf (file, "Window #%d:\n", list->window_num);
32949 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32950 list->num_insn, list->num_uops, list->window_size);
32951 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32952 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32954 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32956 fprintf (file, " insn info:\n");
32958 for (i = 0; i < MAX_INSN; i++)
32960 if (!list->window[i].insn)
32962 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32963 i, group_name[list->window[i].group],
32964 i, (void *)list->window[i].insn,
32965 i, list->window[i].path,
32966 i, list->window[i].byte_len,
32967 i, list->window[i].imm_bytes);
32971 /* Print to stdout a dispatch window. */
32973 DEBUG_FUNCTION void
32974 debug_dispatch_window (int window_num)
32976 debug_dispatch_window_file (stdout, window_num);
32979 /* Print INSN dispatch information to FILE. */
32981 DEBUG_FUNCTION static void
32982 debug_insn_dispatch_info_file (FILE *file, rtx insn)
32985 enum insn_path path;
32986 enum dispatch_group group;
32988 int num_imm_operand;
32989 int num_imm32_operand;
32990 int num_imm64_operand;
32992 if (INSN_CODE (insn) < 0)
32995 byte_len = min_insn_size (insn);
32996 path = get_insn_path (insn);
32997 group = get_insn_group (insn);
32998 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32999 &num_imm64_operand);
33001 fprintf (file, " insn info:\n");
33002 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
33003 group_name[group], path, byte_len);
33004 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
33005 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
33008 /* Print to STDERR the status of the ready list with respect to
33009 dispatch windows. */
33011 DEBUG_FUNCTION void
33012 debug_ready_dispatch (void)
33015 int no_ready = number_in_ready ();
33017 fprintf (stdout, "Number of ready: %d\n", no_ready);
33019 for (i = 0; i < no_ready; i++)
33020 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
33023 /* This routine is the driver of the dispatch scheduler. */
33026 do_dispatch (rtx insn, int mode)
33028 if (mode == DISPATCH_INIT)
33029 init_dispatch_sched ();
33030 else if (mode == ADD_TO_DISPATCH_WINDOW)
33031 add_to_dispatch_window (insn);
33034 /* Return TRUE if Dispatch Scheduling is supported. */
33037 has_dispatch (rtx insn, int action)
33039 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
33045 case IS_DISPATCH_ON:
33050 return is_cmp (insn);
33052 case DISPATCH_VIOLATION:
33053 return dispatch_violation ();
33055 case FITS_DISPATCH_WINDOW:
33056 return fits_dispatch_window (insn);
33062 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
33063 place emms and femms instructions. */
33065 static enum machine_mode
33066 ix86_preferred_simd_mode (enum machine_mode mode)
33068 /* Disable double precision vectorizer if needed. */
33069 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
33072 if (!TARGET_AVX && !TARGET_SSE)
33078 return TARGET_AVX ? V8SFmode : V4SFmode;
33080 return TARGET_AVX ? V4DFmode : V2DFmode;
33096 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
33099 static unsigned int
33100 ix86_autovectorize_vector_sizes (void)
33102 return TARGET_AVX ? 32 | 16 : 0;
33105 /* Initialize the GCC target structure. */
33106 #undef TARGET_RETURN_IN_MEMORY
33107 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
33109 #undef TARGET_LEGITIMIZE_ADDRESS
33110 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
33112 #undef TARGET_ATTRIBUTE_TABLE
33113 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
33114 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33115 # undef TARGET_MERGE_DECL_ATTRIBUTES
33116 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
33119 #undef TARGET_COMP_TYPE_ATTRIBUTES
33120 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
33122 #undef TARGET_INIT_BUILTINS
33123 #define TARGET_INIT_BUILTINS ix86_init_builtins
33124 #undef TARGET_BUILTIN_DECL
33125 #define TARGET_BUILTIN_DECL ix86_builtin_decl
33126 #undef TARGET_EXPAND_BUILTIN
33127 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
33129 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
33130 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33131 ix86_builtin_vectorized_function
33133 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33134 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33136 #undef TARGET_BUILTIN_RECIPROCAL
33137 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33139 #undef TARGET_ASM_FUNCTION_EPILOGUE
33140 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33142 #undef TARGET_ENCODE_SECTION_INFO
33143 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33144 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33146 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33149 #undef TARGET_ASM_OPEN_PAREN
33150 #define TARGET_ASM_OPEN_PAREN ""
33151 #undef TARGET_ASM_CLOSE_PAREN
33152 #define TARGET_ASM_CLOSE_PAREN ""
33154 #undef TARGET_ASM_BYTE_OP
33155 #define TARGET_ASM_BYTE_OP ASM_BYTE
33157 #undef TARGET_ASM_ALIGNED_HI_OP
33158 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33159 #undef TARGET_ASM_ALIGNED_SI_OP
33160 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33162 #undef TARGET_ASM_ALIGNED_DI_OP
33163 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33166 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33167 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33169 #undef TARGET_ASM_UNALIGNED_HI_OP
33170 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33171 #undef TARGET_ASM_UNALIGNED_SI_OP
33172 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33173 #undef TARGET_ASM_UNALIGNED_DI_OP
33174 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33176 #undef TARGET_PRINT_OPERAND
33177 #define TARGET_PRINT_OPERAND ix86_print_operand
33178 #undef TARGET_PRINT_OPERAND_ADDRESS
33179 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33180 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33181 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33182 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33183 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33185 #undef TARGET_SCHED_ADJUST_COST
33186 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33187 #undef TARGET_SCHED_ISSUE_RATE
33188 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33189 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33190 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33191 ia32_multipass_dfa_lookahead
33193 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33194 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33197 #undef TARGET_HAVE_TLS
33198 #define TARGET_HAVE_TLS true
33200 #undef TARGET_CANNOT_FORCE_CONST_MEM
33201 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33202 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33203 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33205 #undef TARGET_DELEGITIMIZE_ADDRESS
33206 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33208 #undef TARGET_MS_BITFIELD_LAYOUT_P
33209 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33212 #undef TARGET_BINDS_LOCAL_P
33213 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33215 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33216 #undef TARGET_BINDS_LOCAL_P
33217 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33220 #undef TARGET_ASM_OUTPUT_MI_THUNK
33221 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33222 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33223 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33225 #undef TARGET_ASM_FILE_START
33226 #define TARGET_ASM_FILE_START x86_file_start
33228 #undef TARGET_DEFAULT_TARGET_FLAGS
33229 #define TARGET_DEFAULT_TARGET_FLAGS \
33231 | TARGET_SUBTARGET_DEFAULT \
33232 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33235 #undef TARGET_HANDLE_OPTION
33236 #define TARGET_HANDLE_OPTION ix86_handle_option
33238 #undef TARGET_OPTION_OVERRIDE
33239 #define TARGET_OPTION_OVERRIDE ix86_option_override
33240 #undef TARGET_OPTION_OPTIMIZATION
33241 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
33242 #undef TARGET_OPTION_INIT_STRUCT
33243 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
33245 #undef TARGET_REGISTER_MOVE_COST
33246 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33247 #undef TARGET_MEMORY_MOVE_COST
33248 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33249 #undef TARGET_RTX_COSTS
33250 #define TARGET_RTX_COSTS ix86_rtx_costs
33251 #undef TARGET_ADDRESS_COST
33252 #define TARGET_ADDRESS_COST ix86_address_cost
33254 #undef TARGET_FIXED_CONDITION_CODE_REGS
33255 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33256 #undef TARGET_CC_MODES_COMPATIBLE
33257 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33259 #undef TARGET_MACHINE_DEPENDENT_REORG
33260 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33262 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33263 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33265 #undef TARGET_BUILD_BUILTIN_VA_LIST
33266 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33268 #undef TARGET_ENUM_VA_LIST_P
33269 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33271 #undef TARGET_FN_ABI_VA_LIST
33272 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33274 #undef TARGET_CANONICAL_VA_LIST_TYPE
33275 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33277 #undef TARGET_EXPAND_BUILTIN_VA_START
33278 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33280 #undef TARGET_MD_ASM_CLOBBERS
33281 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33283 #undef TARGET_PROMOTE_PROTOTYPES
33284 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33285 #undef TARGET_STRUCT_VALUE_RTX
33286 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33287 #undef TARGET_SETUP_INCOMING_VARARGS
33288 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33289 #undef TARGET_MUST_PASS_IN_STACK
33290 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33291 #undef TARGET_FUNCTION_ARG_ADVANCE
33292 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33293 #undef TARGET_FUNCTION_ARG
33294 #define TARGET_FUNCTION_ARG ix86_function_arg
33295 #undef TARGET_PASS_BY_REFERENCE
33296 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33297 #undef TARGET_INTERNAL_ARG_POINTER
33298 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33299 #undef TARGET_UPDATE_STACK_BOUNDARY
33300 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33301 #undef TARGET_GET_DRAP_RTX
33302 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33303 #undef TARGET_STRICT_ARGUMENT_NAMING
33304 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33305 #undef TARGET_STATIC_CHAIN
33306 #define TARGET_STATIC_CHAIN ix86_static_chain
33307 #undef TARGET_TRAMPOLINE_INIT
33308 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33309 #undef TARGET_RETURN_POPS_ARGS
33310 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33312 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33313 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33315 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33316 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33318 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33319 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33321 #undef TARGET_C_MODE_FOR_SUFFIX
33322 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33325 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33326 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33329 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33330 #undef TARGET_INSERT_ATTRIBUTES
33331 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33334 #undef TARGET_MANGLE_TYPE
33335 #define TARGET_MANGLE_TYPE ix86_mangle_type
33337 #undef TARGET_STACK_PROTECT_FAIL
33338 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33340 #undef TARGET_SUPPORTS_SPLIT_STACK
33341 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33343 #undef TARGET_FUNCTION_VALUE
33344 #define TARGET_FUNCTION_VALUE ix86_function_value
33346 #undef TARGET_FUNCTION_VALUE_REGNO_P
33347 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33349 #undef TARGET_SECONDARY_RELOAD
33350 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33352 #undef TARGET_PREFERRED_RELOAD_CLASS
33353 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
33354 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
33355 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
33356 #undef TARGET_CLASS_LIKELY_SPILLED_P
33357 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33359 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33360 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33361 ix86_builtin_vectorization_cost
33362 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33363 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33364 ix86_vectorize_builtin_vec_perm
33365 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33366 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33367 ix86_vectorize_builtin_vec_perm_ok
33368 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
33369 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
33370 ix86_preferred_simd_mode
33371 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
33372 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
33373 ix86_autovectorize_vector_sizes
33375 #undef TARGET_SET_CURRENT_FUNCTION
33376 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33378 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33379 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33381 #undef TARGET_OPTION_SAVE
33382 #define TARGET_OPTION_SAVE ix86_function_specific_save
33384 #undef TARGET_OPTION_RESTORE
33385 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33387 #undef TARGET_OPTION_PRINT
33388 #define TARGET_OPTION_PRINT ix86_function_specific_print
33390 #undef TARGET_CAN_INLINE_P
33391 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33393 #undef TARGET_EXPAND_TO_RTL_HOOK
33394 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33396 #undef TARGET_LEGITIMATE_ADDRESS_P
33397 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33399 #undef TARGET_IRA_COVER_CLASSES
33400 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33402 #undef TARGET_FRAME_POINTER_REQUIRED
33403 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33405 #undef TARGET_CAN_ELIMINATE
33406 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33408 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33409 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33411 #undef TARGET_ASM_CODE_END
33412 #define TARGET_ASM_CODE_END ix86_code_end
33414 struct gcc_target targetm = TARGET_INITIALIZER;
33416 #include "gt-i386.h"