1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1619 m_AMDFAM10 | m_GENERIC,
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1669 static enum stringop_alg stringop_alg = no_stringop;
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1711 /* The "default" register map used in 32bit mode. */
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1724 /* The "default" register map used in 64bit mode. */
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1802 /* Define parameter passing and return registers. */
1804 static int const x86_64_int_parameter_registers[6] =
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1811 CX_REG, DX_REG, R8_REG, R9_REG
1814 static int const x86_64_int_return_registers[4] =
1816 AX_REG, DX_REG, DI_REG, SI_REG
1819 /* Define the structure for the machine field in struct function. */
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1825 struct stack_local_entry *next;
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1835 saved static chain if ix86_static_chain_on_stack
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1844 <- sse_regs_save_offset
1847 [va_arg registers] |
1851 [padding2] | = to_allocate
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1900 /* -mstackrealign option */
1901 extern int ix86_force_align_arg_pointer;
1902 static const char ix86_force_align_arg_pointer_string[]
1903 = "force_align_arg_pointer";
1905 static rtx (*ix86_gen_leave) (void);
1906 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1908 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1909 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1910 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1912 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1913 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1914 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1916 /* Preferred alignment for stack boundary in bits. */
1917 unsigned int ix86_preferred_stack_boundary;
1919 /* Alignment for incoming stack boundary in bits specified at
1921 static unsigned int ix86_user_incoming_stack_boundary;
1923 /* Default alignment for incoming stack boundary in bits. */
1924 static unsigned int ix86_default_incoming_stack_boundary;
1926 /* Alignment for incoming stack boundary in bits. */
1927 unsigned int ix86_incoming_stack_boundary;
1929 /* The abi used by target. */
1930 enum calling_abi ix86_abi;
1932 /* Values 1-5: see jump.c */
1933 int ix86_branch_cost;
1935 /* Calling abi specific va_list type nodes. */
1936 static GTY(()) tree sysv_va_list_type_node;
1937 static GTY(()) tree ms_va_list_type_node;
1939 /* Variables which are this size or smaller are put in the data/bss
1940 or ldata/lbss sections. */
1942 int ix86_section_threshold = 65536;
1944 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1945 char internal_label_prefix[16];
1946 int internal_label_prefix_len;
1948 /* Fence to use after loop using movnt. */
1951 /* Register class used for passing given 64bit part of the argument.
1952 These represent classes as documented by the PS ABI, with the exception
1953 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1954 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1956 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1957 whenever possible (upper half does contain padding). */
1958 enum x86_64_reg_class
1961 X86_64_INTEGER_CLASS,
1962 X86_64_INTEGERSI_CLASS,
1969 X86_64_COMPLEX_X87_CLASS,
1973 #define MAX_CLASSES 4
1975 /* Table of constants used by fldpi, fldln2, etc.... */
1976 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1977 static bool ext_80387_constants_init = 0;
1980 static struct machine_function * ix86_init_machine_status (void);
1981 static rtx ix86_function_value (const_tree, const_tree, bool);
1982 static bool ix86_function_value_regno_p (const unsigned int);
1983 static rtx ix86_static_chain (const_tree, bool);
1984 static int ix86_function_regparm (const_tree, const_tree);
1985 static void ix86_compute_frame_layout (struct ix86_frame *);
1986 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1988 static void ix86_add_new_builtins (int);
1989 static rtx ix86_expand_vec_perm_builtin (tree);
1990 static tree ix86_canonical_va_list_type (tree);
1991 static void predict_jump (int);
1992 static unsigned int split_stack_prologue_scratch_regno (void);
1993 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
1995 enum ix86_function_specific_strings
1997 IX86_FUNCTION_SPECIFIC_ARCH,
1998 IX86_FUNCTION_SPECIFIC_TUNE,
1999 IX86_FUNCTION_SPECIFIC_FPMATH,
2000 IX86_FUNCTION_SPECIFIC_MAX
2003 static char *ix86_target_string (int, int, const char *, const char *,
2004 const char *, bool);
2005 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2006 static void ix86_function_specific_save (struct cl_target_option *);
2007 static void ix86_function_specific_restore (struct cl_target_option *);
2008 static void ix86_function_specific_print (FILE *, int,
2009 struct cl_target_option *);
2010 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2011 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2012 static bool ix86_can_inline_p (tree, tree);
2013 static void ix86_set_current_function (tree);
2014 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2016 static enum calling_abi ix86_function_abi (const_tree);
2019 #ifndef SUBTARGET32_DEFAULT_CPU
2020 #define SUBTARGET32_DEFAULT_CPU "i386"
2023 /* The svr4 ABI for the i386 says that records and unions are returned
2025 #ifndef DEFAULT_PCC_STRUCT_RETURN
2026 #define DEFAULT_PCC_STRUCT_RETURN 1
2029 /* Whether -mtune= or -march= were specified */
2030 static int ix86_tune_defaulted;
2031 static int ix86_arch_specified;
2033 /* A mask of ix86_isa_flags that includes bit X if X
2034 was set or cleared on the command line. */
2035 static int ix86_isa_flags_explicit;
2037 /* Define a set of ISAs which are available when a given ISA is
2038 enabled. MMX and SSE ISAs are handled separately. */
2040 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2041 #define OPTION_MASK_ISA_3DNOW_SET \
2042 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2044 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2045 #define OPTION_MASK_ISA_SSE2_SET \
2046 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2047 #define OPTION_MASK_ISA_SSE3_SET \
2048 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2049 #define OPTION_MASK_ISA_SSSE3_SET \
2050 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2051 #define OPTION_MASK_ISA_SSE4_1_SET \
2052 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2053 #define OPTION_MASK_ISA_SSE4_2_SET \
2054 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2055 #define OPTION_MASK_ISA_AVX_SET \
2056 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2057 #define OPTION_MASK_ISA_FMA_SET \
2058 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2060 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2062 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2064 #define OPTION_MASK_ISA_SSE4A_SET \
2065 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2066 #define OPTION_MASK_ISA_FMA4_SET \
2067 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2068 | OPTION_MASK_ISA_AVX_SET)
2069 #define OPTION_MASK_ISA_XOP_SET \
2070 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2071 #define OPTION_MASK_ISA_LWP_SET \
2074 /* AES and PCLMUL need SSE2 because they use xmm registers */
2075 #define OPTION_MASK_ISA_AES_SET \
2076 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2077 #define OPTION_MASK_ISA_PCLMUL_SET \
2078 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2080 #define OPTION_MASK_ISA_ABM_SET \
2081 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2083 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2084 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2085 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2086 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2087 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2089 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2090 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2091 #define OPTION_MASK_ISA_F16C_SET \
2092 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2094 /* Define a set of ISAs which aren't available when a given ISA is
2095 disabled. MMX and SSE ISAs are handled separately. */
2097 #define OPTION_MASK_ISA_MMX_UNSET \
2098 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2099 #define OPTION_MASK_ISA_3DNOW_UNSET \
2100 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2101 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2103 #define OPTION_MASK_ISA_SSE_UNSET \
2104 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2105 #define OPTION_MASK_ISA_SSE2_UNSET \
2106 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2107 #define OPTION_MASK_ISA_SSE3_UNSET \
2108 (OPTION_MASK_ISA_SSE3 \
2109 | OPTION_MASK_ISA_SSSE3_UNSET \
2110 | OPTION_MASK_ISA_SSE4A_UNSET )
2111 #define OPTION_MASK_ISA_SSSE3_UNSET \
2112 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2113 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2114 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2115 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2116 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2117 #define OPTION_MASK_ISA_AVX_UNSET \
2118 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2119 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2120 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2122 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2124 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2126 #define OPTION_MASK_ISA_SSE4A_UNSET \
2127 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2129 #define OPTION_MASK_ISA_FMA4_UNSET \
2130 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2131 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2132 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2134 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2135 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2136 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2137 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2138 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2139 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2140 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2141 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2143 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2144 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2145 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2147 /* Vectorization library interface and handlers. */
2148 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2151 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2153 /* Processor target table, indexed by processor number */
2156 const struct processor_costs *cost; /* Processor costs */
2157 const int align_loop; /* Default alignments. */
2158 const int align_loop_max_skip;
2159 const int align_jump;
2160 const int align_jump_max_skip;
2161 const int align_func;
2164 static const struct ptt processor_target_table[PROCESSOR_max] =
2166 {&i386_cost, 4, 3, 4, 3, 4},
2167 {&i486_cost, 16, 15, 16, 15, 16},
2168 {&pentium_cost, 16, 7, 16, 7, 16},
2169 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2170 {&geode_cost, 0, 0, 0, 0, 0},
2171 {&k6_cost, 32, 7, 32, 7, 32},
2172 {&athlon_cost, 16, 7, 16, 7, 16},
2173 {&pentium4_cost, 0, 0, 0, 0, 0},
2174 {&k8_cost, 16, 7, 16, 7, 16},
2175 {&nocona_cost, 0, 0, 0, 0, 0},
2176 {&core2_cost, 16, 10, 16, 10, 16},
2177 {&generic32_cost, 16, 7, 16, 7, 16},
2178 {&generic64_cost, 16, 10, 16, 10, 16},
2179 {&amdfam10_cost, 32, 24, 32, 7, 32},
2180 {&bdver1_cost, 32, 24, 32, 7, 32},
2181 {&atom_cost, 16, 7, 16, 7, 16}
2184 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2211 /* Return true if a red-zone is in use. */
2214 ix86_using_red_zone (void)
2216 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2219 /* Implement TARGET_HANDLE_OPTION. */
2222 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2229 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2230 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2234 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2235 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2242 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2243 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2247 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2248 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2258 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2263 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2271 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2276 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2284 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2289 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2297 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2302 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2310 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2315 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2323 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2328 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2336 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2337 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2341 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2349 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2350 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2354 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2360 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2361 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2365 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2366 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2372 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2373 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2377 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2378 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2385 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2386 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2390 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2391 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2398 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2399 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2403 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2404 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2411 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2412 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2416 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2417 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2424 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2425 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2429 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2430 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2437 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2438 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2442 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2443 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2450 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2451 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2455 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2456 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2463 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2464 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2468 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2469 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2476 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2477 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2481 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2482 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2489 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2490 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2494 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2495 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2502 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2503 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2507 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2508 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2515 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2516 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2520 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2521 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2528 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2529 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2533 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2534 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2541 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2542 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2546 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2547 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2554 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2555 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2559 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2560 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2573 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2574 const char *fpmath, bool add_nl_p)
2576 struct ix86_target_opts
2578 const char *option; /* option string */
2579 int mask; /* isa mask options */
2582 /* This table is ordered so that options like -msse4.2 that imply
2583 preceding options while match those first. */
2584 static struct ix86_target_opts isa_opts[] =
2586 { "-m64", OPTION_MASK_ISA_64BIT },
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2592 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2593 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2594 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2595 { "-msse3", OPTION_MASK_ISA_SSE3 },
2596 { "-msse2", OPTION_MASK_ISA_SSE2 },
2597 { "-msse", OPTION_MASK_ISA_SSE },
2598 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2599 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2600 { "-mmmx", OPTION_MASK_ISA_MMX },
2601 { "-mabm", OPTION_MASK_ISA_ABM },
2602 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2603 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2604 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2605 { "-maes", OPTION_MASK_ISA_AES },
2606 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2607 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2608 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2609 { "-mf16c", OPTION_MASK_ISA_F16C },
2613 static struct ix86_target_opts flag_opts[] =
2615 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2616 { "-m80387", MASK_80387 },
2617 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2618 { "-malign-double", MASK_ALIGN_DOUBLE },
2619 { "-mcld", MASK_CLD },
2620 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2621 { "-mieee-fp", MASK_IEEE_FP },
2622 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2623 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2624 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2625 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2626 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2627 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2628 { "-mno-red-zone", MASK_NO_RED_ZONE },
2629 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2630 { "-mrecip", MASK_RECIP },
2631 { "-mrtd", MASK_RTD },
2632 { "-msseregparm", MASK_SSEREGPARM },
2633 { "-mstack-arg-probe", MASK_STACK_PROBE },
2634 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2635 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2638 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2641 char target_other[40];
2650 memset (opts, '\0', sizeof (opts));
2652 /* Add -march= option. */
2655 opts[num][0] = "-march=";
2656 opts[num++][1] = arch;
2659 /* Add -mtune= option. */
2662 opts[num][0] = "-mtune=";
2663 opts[num++][1] = tune;
2666 /* Pick out the options in isa options. */
2667 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2669 if ((isa & isa_opts[i].mask) != 0)
2671 opts[num++][0] = isa_opts[i].option;
2672 isa &= ~ isa_opts[i].mask;
2676 if (isa && add_nl_p)
2678 opts[num++][0] = isa_other;
2679 sprintf (isa_other, "(other isa: %#x)", isa);
2682 /* Add flag options. */
2683 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2685 if ((flags & flag_opts[i].mask) != 0)
2687 opts[num++][0] = flag_opts[i].option;
2688 flags &= ~ flag_opts[i].mask;
2692 if (flags && add_nl_p)
2694 opts[num++][0] = target_other;
2695 sprintf (target_other, "(other flags: %#x)", flags);
2698 /* Add -fpmath= option. */
2701 opts[num][0] = "-mfpmath=";
2702 opts[num++][1] = fpmath;
2709 gcc_assert (num < ARRAY_SIZE (opts));
2711 /* Size the string. */
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2717 for (j = 0; j < 2; j++)
2719 len += strlen (opts[i][j]);
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2726 for (i = 0; i < num; i++)
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2746 for (j = 0; j < 2; j++)
2749 memcpy (ptr, opts[i][j], len2[j]);
2751 line_len += len2[j];
2756 gcc_assert (ret + len >= ptr);
2761 /* Return TRUE if software prefetching is beneficial for the
2765 software_prefetching_beneficial_p (void)
2769 case PROCESSOR_GEODE:
2771 case PROCESSOR_ATHLON:
2773 case PROCESSOR_AMDFAM10:
2781 /* Return true, if profiling code should be emitted before
2782 prologue. Otherwise it returns false.
2783 Note: For x86 with "hotfix" it is sorried. */
2785 ix86_profile_before_prologue (void)
2787 return flag_fentry != 0;
2790 /* Function that is callable from the debugger to print the current
2793 ix86_debug_options (void)
2795 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2796 ix86_arch_string, ix86_tune_string,
2797 ix86_fpmath_string, true);
2801 fprintf (stderr, "%s\n\n", opts);
2805 fputs ("<no options>\n\n", stderr);
2810 /* Override various settings based on options. If MAIN_ARGS_P, the
2811 options are from the command line, otherwise they are from
2815 ix86_option_override_internal (bool main_args_p)
2818 unsigned int ix86_arch_mask, ix86_tune_mask;
2819 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2824 /* Comes from final.c -- no real reason to change it. */
2825 #define MAX_CODE_ALIGN 16
2833 PTA_PREFETCH_SSE = 1 << 4,
2835 PTA_3DNOW_A = 1 << 6,
2839 PTA_POPCNT = 1 << 10,
2841 PTA_SSE4A = 1 << 12,
2842 PTA_NO_SAHF = 1 << 13,
2843 PTA_SSE4_1 = 1 << 14,
2844 PTA_SSE4_2 = 1 << 15,
2846 PTA_PCLMUL = 1 << 17,
2849 PTA_MOVBE = 1 << 20,
2853 PTA_FSGSBASE = 1 << 24,
2854 PTA_RDRND = 1 << 25,
2860 const char *const name; /* processor name or nickname. */
2861 const enum processor_type processor;
2862 const enum attr_cpu schedule;
2863 const unsigned /*enum pta_flags*/ flags;
2865 const processor_alias_table[] =
2867 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2868 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2869 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2870 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2871 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2872 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2873 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2874 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2875 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2876 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2877 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2878 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2879 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2883 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2884 PTA_MMX | PTA_SSE | PTA_SSE2},
2885 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX |PTA_SSE | PTA_SSE2},
2887 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2},
2889 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2891 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_CX16 | PTA_NO_SAHF},
2894 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16},
2897 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2898 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2899 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2900 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2902 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2903 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2904 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2905 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2909 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2914 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2915 {"x86-64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_NO_SAHF},
2920 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2923 {"opteron", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_NO_SAHF},
2926 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2929 {"athlon64", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_NO_SAHF},
2932 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2935 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_NO_SAHF},
2938 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2944 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2946 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2947 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2948 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2949 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2950 0 /* flags are only used for -march switch. */ },
2951 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2952 PTA_64BIT /* flags are only used for -march switch. */ },
2955 int const pta_size = ARRAY_SIZE (processor_alias_table);
2957 /* Set up prefix/suffix so the error messages refer to either the command
2958 line argument, or the attribute(target). */
2967 prefix = "option(\"";
2972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2973 SUBTARGET_OVERRIDE_OPTIONS;
2976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2977 SUBSUBTARGET_OVERRIDE_OPTIONS;
2980 /* -fPIC is the default for x86_64. */
2981 if (TARGET_MACHO && TARGET_64BIT)
2984 /* Need to check -mtune=generic first. */
2985 if (ix86_tune_string)
2987 if (!strcmp (ix86_tune_string, "generic")
2988 || !strcmp (ix86_tune_string, "i686")
2989 /* As special support for cross compilers we read -mtune=native
2990 as -mtune=generic. With native compilers we won't see the
2991 -mtune=native, as it was changed by the driver. */
2992 || !strcmp (ix86_tune_string, "native"))
2995 ix86_tune_string = "generic64";
2997 ix86_tune_string = "generic32";
2999 /* If this call is for setting the option attribute, allow the
3000 generic32/generic64 that was previously set. */
3001 else if (!main_args_p
3002 && (!strcmp (ix86_tune_string, "generic32")
3003 || !strcmp (ix86_tune_string, "generic64")))
3005 else if (!strncmp (ix86_tune_string, "generic", 7))
3006 error ("bad value (%s) for %stune=%s %s",
3007 ix86_tune_string, prefix, suffix, sw);
3008 else if (!strcmp (ix86_tune_string, "x86-64"))
3009 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3010 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3011 prefix, suffix, prefix, suffix, prefix, suffix);
3015 if (ix86_arch_string)
3016 ix86_tune_string = ix86_arch_string;
3017 if (!ix86_tune_string)
3019 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3020 ix86_tune_defaulted = 1;
3023 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3024 need to use a sensible tune option. */
3025 if (!strcmp (ix86_tune_string, "generic")
3026 || !strcmp (ix86_tune_string, "x86-64")
3027 || !strcmp (ix86_tune_string, "i686"))
3030 ix86_tune_string = "generic64";
3032 ix86_tune_string = "generic32";
3036 if (ix86_stringop_string)
3038 if (!strcmp (ix86_stringop_string, "rep_byte"))
3039 stringop_alg = rep_prefix_1_byte;
3040 else if (!strcmp (ix86_stringop_string, "libcall"))
3041 stringop_alg = libcall;
3042 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3043 stringop_alg = rep_prefix_4_byte;
3044 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3046 /* rep; movq isn't available in 32-bit code. */
3047 stringop_alg = rep_prefix_8_byte;
3048 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3049 stringop_alg = loop_1_byte;
3050 else if (!strcmp (ix86_stringop_string, "loop"))
3051 stringop_alg = loop;
3052 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3053 stringop_alg = unrolled_loop;
3055 error ("bad value (%s) for %sstringop-strategy=%s %s",
3056 ix86_stringop_string, prefix, suffix, sw);
3059 if (!ix86_arch_string)
3060 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3062 ix86_arch_specified = 1;
3064 /* Validate -mabi= value. */
3065 if (ix86_abi_string)
3067 if (strcmp (ix86_abi_string, "sysv") == 0)
3068 ix86_abi = SYSV_ABI;
3069 else if (strcmp (ix86_abi_string, "ms") == 0)
3072 error ("unknown ABI (%s) for %sabi=%s %s",
3073 ix86_abi_string, prefix, suffix, sw);
3076 ix86_abi = DEFAULT_ABI;
3078 if (ix86_cmodel_string != 0)
3080 if (!strcmp (ix86_cmodel_string, "small"))
3081 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3082 else if (!strcmp (ix86_cmodel_string, "medium"))
3083 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3084 else if (!strcmp (ix86_cmodel_string, "large"))
3085 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3087 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3088 else if (!strcmp (ix86_cmodel_string, "32"))
3089 ix86_cmodel = CM_32;
3090 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3091 ix86_cmodel = CM_KERNEL;
3093 error ("bad value (%s) for %scmodel=%s %s",
3094 ix86_cmodel_string, prefix, suffix, sw);
3098 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3099 use of rip-relative addressing. This eliminates fixups that
3100 would otherwise be needed if this object is to be placed in a
3101 DLL, and is essentially just as efficient as direct addressing. */
3102 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3103 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3104 else if (TARGET_64BIT)
3105 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3107 ix86_cmodel = CM_32;
3109 if (ix86_asm_string != 0)
3112 && !strcmp (ix86_asm_string, "intel"))
3113 ix86_asm_dialect = ASM_INTEL;
3114 else if (!strcmp (ix86_asm_string, "att"))
3115 ix86_asm_dialect = ASM_ATT;
3117 error ("bad value (%s) for %sasm=%s %s",
3118 ix86_asm_string, prefix, suffix, sw);
3120 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3121 error ("code model %qs not supported in the %s bit mode",
3122 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3123 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3124 sorry ("%i-bit mode not compiled in",
3125 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3127 for (i = 0; i < pta_size; i++)
3128 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3130 ix86_schedule = processor_alias_table[i].schedule;
3131 ix86_arch = processor_alias_table[i].processor;
3132 /* Default cpu tuning to the architecture. */
3133 ix86_tune = ix86_arch;
3135 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3136 error ("CPU you selected does not support x86-64 "
3139 if (processor_alias_table[i].flags & PTA_MMX
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3141 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3142 if (processor_alias_table[i].flags & PTA_3DNOW
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3145 if (processor_alias_table[i].flags & PTA_3DNOW_A
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3147 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3148 if (processor_alias_table[i].flags & PTA_SSE
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3151 if (processor_alias_table[i].flags & PTA_SSE2
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3154 if (processor_alias_table[i].flags & PTA_SSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3157 if (processor_alias_table[i].flags & PTA_SSSE3
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3160 if (processor_alias_table[i].flags & PTA_SSE4_1
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3163 if (processor_alias_table[i].flags & PTA_SSE4_2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3166 if (processor_alias_table[i].flags & PTA_AVX
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3168 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3169 if (processor_alias_table[i].flags & PTA_FMA
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3171 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3172 if (processor_alias_table[i].flags & PTA_SSE4A
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3175 if (processor_alias_table[i].flags & PTA_FMA4
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3177 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3178 if (processor_alias_table[i].flags & PTA_XOP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3181 if (processor_alias_table[i].flags & PTA_LWP
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3183 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3184 if (processor_alias_table[i].flags & PTA_ABM
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3186 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3187 if (processor_alias_table[i].flags & PTA_CX16
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3189 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3190 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3192 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3193 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3196 if (processor_alias_table[i].flags & PTA_MOVBE
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3198 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3199 if (processor_alias_table[i].flags & PTA_AES
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3201 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3202 if (processor_alias_table[i].flags & PTA_PCLMUL
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3204 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3205 if (processor_alias_table[i].flags & PTA_FSGSBASE
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3207 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3208 if (processor_alias_table[i].flags & PTA_RDRND
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3210 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3211 if (processor_alias_table[i].flags & PTA_F16C
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3213 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3214 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3215 x86_prefetch_sse = true;
3220 if (!strcmp (ix86_arch_string, "generic"))
3221 error ("generic CPU can be used only for %stune=%s %s",
3222 prefix, suffix, sw);
3223 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3224 error ("bad value (%s) for %sarch=%s %s",
3225 ix86_arch_string, prefix, suffix, sw);
3227 ix86_arch_mask = 1u << ix86_arch;
3228 for (i = 0; i < X86_ARCH_LAST; ++i)
3229 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_tune = processor_alias_table[i].processor;
3236 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3238 if (ix86_tune_defaulted)
3240 ix86_tune_string = "x86-64";
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_tune_string,
3243 processor_alias_table[i].name))
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3249 error ("CPU you selected does not support x86-64 "
3252 /* Intel CPUs have always interpreted SSE prefetch instructions as
3253 NOPs; so, we can enable SSE prefetch instructions even when
3254 -mtune (rather than -march) points us to a processor that has them.
3255 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3256 higher processors. */
3258 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3259 x86_prefetch_sse = true;
3263 if (ix86_tune_specified && i == pta_size)
3264 error ("bad value (%s) for %stune=%s %s",
3265 ix86_tune_string, prefix, suffix, sw);
3267 ix86_tune_mask = 1u << ix86_tune;
3268 for (i = 0; i < X86_TUNE_LAST; ++i)
3269 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3271 #ifndef USE_IX86_FRAME_POINTER
3272 #define USE_IX86_FRAME_POINTER 0
3275 /* Set the default values for switches whose default depends on TARGET_64BIT
3276 in case they weren't overwritten by command line options. */
3281 if (flag_omit_frame_pointer == 2)
3282 flag_omit_frame_pointer = 1;
3283 if (flag_asynchronous_unwind_tables == 2)
3284 flag_asynchronous_unwind_tables = 1;
3285 if (flag_pcc_struct_return == 2)
3286 flag_pcc_struct_return = 0;
3292 if (flag_omit_frame_pointer == 2)
3293 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3294 if (flag_asynchronous_unwind_tables == 2)
3295 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3296 if (flag_pcc_struct_return == 2)
3297 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3301 ix86_cost = &ix86_size_cost;
3303 ix86_cost = processor_target_table[ix86_tune].cost;
3305 /* Arrange to set up i386_stack_locals for all functions. */
3306 init_machine_status = ix86_init_machine_status;
3308 /* Validate -mregparm= value. */
3309 if (ix86_regparm_string)
3312 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3313 i = atoi (ix86_regparm_string);
3314 if (i < 0 || i > REGPARM_MAX)
3315 error ("%sregparm=%d%s is not between 0 and %d",
3316 prefix, i, suffix, REGPARM_MAX);
3321 ix86_regparm = REGPARM_MAX;
3323 /* If the user has provided any of the -malign-* options,
3324 warn and use that value only if -falign-* is not set.
3325 Remove this code in GCC 3.2 or later. */
3326 if (ix86_align_loops_string)
3328 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3329 prefix, suffix, suffix);
3330 if (align_loops == 0)
3332 i = atoi (ix86_align_loops_string);
3333 if (i < 0 || i > MAX_CODE_ALIGN)
3334 error ("%salign-loops=%d%s is not between 0 and %d",
3335 prefix, i, suffix, MAX_CODE_ALIGN);
3337 align_loops = 1 << i;
3341 if (ix86_align_jumps_string)
3343 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3344 prefix, suffix, suffix);
3345 if (align_jumps == 0)
3347 i = atoi (ix86_align_jumps_string);
3348 if (i < 0 || i > MAX_CODE_ALIGN)
3349 error ("%salign-loops=%d%s is not between 0 and %d",
3350 prefix, i, suffix, MAX_CODE_ALIGN);
3352 align_jumps = 1 << i;
3356 if (ix86_align_funcs_string)
3358 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3359 prefix, suffix, suffix);
3360 if (align_functions == 0)
3362 i = atoi (ix86_align_funcs_string);
3363 if (i < 0 || i > MAX_CODE_ALIGN)
3364 error ("%salign-loops=%d%s is not between 0 and %d",
3365 prefix, i, suffix, MAX_CODE_ALIGN);
3367 align_functions = 1 << i;
3371 /* Default align_* from the processor table. */
3372 if (align_loops == 0)
3374 align_loops = processor_target_table[ix86_tune].align_loop;
3375 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3377 if (align_jumps == 0)
3379 align_jumps = processor_target_table[ix86_tune].align_jump;
3380 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3382 if (align_functions == 0)
3384 align_functions = processor_target_table[ix86_tune].align_func;
3387 /* Validate -mbranch-cost= value, or provide default. */
3388 ix86_branch_cost = ix86_cost->branch_cost;
3389 if (ix86_branch_cost_string)
3391 i = atoi (ix86_branch_cost_string);
3393 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3395 ix86_branch_cost = i;
3397 if (ix86_section_threshold_string)
3399 i = atoi (ix86_section_threshold_string);
3401 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3403 ix86_section_threshold = i;
3406 if (ix86_tls_dialect_string)
3408 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU;
3410 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3411 ix86_tls_dialect = TLS_DIALECT_GNU2;
3413 error ("bad value (%s) for %stls-dialect=%s %s",
3414 ix86_tls_dialect_string, prefix, suffix, sw);
3417 if (ix87_precision_string)
3419 i = atoi (ix87_precision_string);
3420 if (i != 32 && i != 64 && i != 80)
3421 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3426 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3428 /* Enable by default the SSE and MMX builtins. Do allow the user to
3429 explicitly disable any of these. In particular, disabling SSE and
3430 MMX for kernel code is extremely useful. */
3431 if (!ix86_arch_specified)
3433 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3434 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3437 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3441 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3443 if (!ix86_arch_specified)
3445 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3447 /* i386 ABI does not specify red zone. It still makes sense to use it
3448 when programmer takes care to stack from being destroyed. */
3449 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3450 target_flags |= MASK_NO_RED_ZONE;
3453 /* Keep nonleaf frame pointers. */
3454 if (flag_omit_frame_pointer)
3455 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3456 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3457 flag_omit_frame_pointer = 1;
3459 /* If we're doing fast math, we don't care about comparison order
3460 wrt NaNs. This lets us use a shorter comparison sequence. */
3461 if (flag_finite_math_only)
3462 target_flags &= ~MASK_IEEE_FP;
3464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3465 since the insns won't need emulation. */
3466 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3467 target_flags &= ~MASK_NO_FANCY_MATH_387;
3469 /* Likewise, if the target doesn't have a 387, or we've specified
3470 software floating point, don't use 387 inline intrinsics. */
3472 target_flags |= MASK_NO_FANCY_MATH_387;
3474 /* Turn on MMX builtins for -msse. */
3477 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3478 x86_prefetch_sse = true;
3481 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3482 if (TARGET_SSE4_2 || TARGET_ABM)
3483 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3485 /* Validate -mpreferred-stack-boundary= value or default it to
3486 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3487 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3488 if (ix86_preferred_stack_boundary_string)
3490 i = atoi (ix86_preferred_stack_boundary_string);
3491 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3492 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3493 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3495 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3498 /* Set the default value for -mstackrealign. */
3499 if (ix86_force_align_arg_pointer == -1)
3500 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3502 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3504 /* Validate -mincoming-stack-boundary= value or default it to
3505 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3506 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3507 if (ix86_incoming_stack_boundary_string)
3509 i = atoi (ix86_incoming_stack_boundary_string);
3510 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3511 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3512 i, TARGET_64BIT ? 4 : 2);
3515 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3516 ix86_incoming_stack_boundary
3517 = ix86_user_incoming_stack_boundary;
3521 /* Accept -msseregparm only if at least SSE support is enabled. */
3522 if (TARGET_SSEREGPARM
3524 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3526 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3527 if (ix86_fpmath_string != 0)
3529 if (! strcmp (ix86_fpmath_string, "387"))
3530 ix86_fpmath = FPMATH_387;
3531 else if (! strcmp (ix86_fpmath_string, "sse"))
3535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3536 ix86_fpmath = FPMATH_387;
3539 ix86_fpmath = FPMATH_SSE;
3541 else if (! strcmp (ix86_fpmath_string, "387,sse")
3542 || ! strcmp (ix86_fpmath_string, "387+sse")
3543 || ! strcmp (ix86_fpmath_string, "sse,387")
3544 || ! strcmp (ix86_fpmath_string, "sse+387")
3545 || ! strcmp (ix86_fpmath_string, "both"))
3549 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3550 ix86_fpmath = FPMATH_387;
3552 else if (!TARGET_80387)
3554 warning (0, "387 instruction set disabled, using SSE arithmetics");
3555 ix86_fpmath = FPMATH_SSE;
3558 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3561 error ("bad value (%s) for %sfpmath=%s %s",
3562 ix86_fpmath_string, prefix, suffix, sw);
3565 /* If the i387 is disabled, then do not return values in it. */
3567 target_flags &= ~MASK_FLOAT_RETURNS;
3569 /* Use external vectorized library in vectorizing intrinsics. */
3570 if (ix86_veclibabi_string)
3572 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_svml;
3574 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3575 ix86_veclib_handler = ix86_veclibabi_acml;
3577 error ("unknown vectorization library ABI type (%s) for "
3578 "%sveclibabi=%s %s", ix86_veclibabi_string,
3579 prefix, suffix, sw);
3582 if ((!USE_IX86_FRAME_POINTER
3583 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3584 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3586 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3588 /* ??? Unwind info is not correct around the CFG unless either a frame
3589 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3590 unwind info generation to be aware of the CFG and propagating states
3592 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3593 || flag_exceptions || flag_non_call_exceptions)
3594 && flag_omit_frame_pointer
3595 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3597 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3598 warning (0, "unwind tables currently require either a frame pointer "
3599 "or %saccumulate-outgoing-args%s for correctness",
3601 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3604 /* If stack probes are required, the space used for large function
3605 arguments on the stack must also be probed, so enable
3606 -maccumulate-outgoing-args so this happens in the prologue. */
3607 if (TARGET_STACK_PROBE
3608 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3610 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3611 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3612 "for correctness", prefix, suffix);
3613 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3616 /* For sane SSE instruction set generation we need fcomi instruction.
3617 It is safe to enable all CMOVE instructions. */
3621 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3624 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3625 p = strchr (internal_label_prefix, 'X');
3626 internal_label_prefix_len = p - internal_label_prefix;
3630 /* When scheduling description is not available, disable scheduler pass
3631 so it won't slow down the compilation and make x87 code slower. */
3632 if (!TARGET_SCHEDULE)
3633 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3635 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3636 set_param_value ("simultaneous-prefetches",
3637 ix86_cost->simultaneous_prefetches);
3638 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3639 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3640 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3641 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3642 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3643 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3645 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3646 if (flag_prefetch_loop_arrays < 0
3649 && software_prefetching_beneficial_p ())
3650 flag_prefetch_loop_arrays = 1;
3652 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3653 can be optimized to ap = __builtin_next_arg (0). */
3654 if (!TARGET_64BIT && !flag_split_stack)
3655 targetm.expand_builtin_va_start = NULL;
3659 ix86_gen_leave = gen_leave_rex64;
3660 ix86_gen_add3 = gen_adddi3;
3661 ix86_gen_sub3 = gen_subdi3;
3662 ix86_gen_sub3_carry = gen_subdi3_carry;
3663 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3664 ix86_gen_monitor = gen_sse3_monitor64;
3665 ix86_gen_andsp = gen_anddi3;
3666 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3667 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3668 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3672 ix86_gen_leave = gen_leave;
3673 ix86_gen_add3 = gen_addsi3;
3674 ix86_gen_sub3 = gen_subsi3;
3675 ix86_gen_sub3_carry = gen_subsi3_carry;
3676 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3677 ix86_gen_monitor = gen_sse3_monitor;
3678 ix86_gen_andsp = gen_andsi3;
3679 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3680 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3681 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3685 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3687 target_flags |= MASK_CLD & ~target_flags_explicit;
3690 if (!TARGET_64BIT && flag_pic)
3692 if (flag_fentry > 0)
3693 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3696 if (flag_fentry < 0)
3698 #if defined(PROFILE_BEFORE_PROLOGUE)
3705 /* Save the initial options in case the user does function specific options */
3707 target_option_default_node = target_option_current_node
3708 = build_target_option_node ();
3711 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3714 ix86_option_override (void)
3716 ix86_option_override_internal (true);
3719 /* Update register usage after having seen the compiler flags. */
3722 ix86_conditional_register_usage (void)
3727 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3729 if (fixed_regs[i] > 1)
3730 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3731 if (call_used_regs[i] > 1)
3732 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3735 /* The PIC register, if it exists, is fixed. */
3736 j = PIC_OFFSET_TABLE_REGNUM;
3737 if (j != INVALID_REGNUM)
3738 fixed_regs[j] = call_used_regs[j] = 1;
3740 /* The MS_ABI changes the set of call-used registers. */
3741 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3743 call_used_regs[SI_REG] = 0;
3744 call_used_regs[DI_REG] = 0;
3745 call_used_regs[XMM6_REG] = 0;
3746 call_used_regs[XMM7_REG] = 0;
3747 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3748 call_used_regs[i] = 0;
3751 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3752 other call-clobbered regs for 64-bit. */
3755 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3757 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3758 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3759 && call_used_regs[i])
3760 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3763 /* If MMX is disabled, squash the registers. */
3765 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3766 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3767 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3769 /* If SSE is disabled, squash the registers. */
3771 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3772 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3773 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3775 /* If the FPU is disabled, squash the registers. */
3776 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3777 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3778 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3779 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3781 /* If 32-bit, squash the 64-bit registers. */
3784 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3786 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3792 /* Save the current options */
3795 ix86_function_specific_save (struct cl_target_option *ptr)
3797 ptr->arch = ix86_arch;
3798 ptr->schedule = ix86_schedule;
3799 ptr->tune = ix86_tune;
3800 ptr->fpmath = ix86_fpmath;
3801 ptr->branch_cost = ix86_branch_cost;
3802 ptr->tune_defaulted = ix86_tune_defaulted;
3803 ptr->arch_specified = ix86_arch_specified;
3804 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3805 ptr->target_flags_explicit = target_flags_explicit;
3807 /* The fields are char but the variables are not; make sure the
3808 values fit in the fields. */
3809 gcc_assert (ptr->arch == ix86_arch);
3810 gcc_assert (ptr->schedule == ix86_schedule);
3811 gcc_assert (ptr->tune == ix86_tune);
3812 gcc_assert (ptr->fpmath == ix86_fpmath);
3813 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3816 /* Restore the current options */
3819 ix86_function_specific_restore (struct cl_target_option *ptr)
3821 enum processor_type old_tune = ix86_tune;
3822 enum processor_type old_arch = ix86_arch;
3823 unsigned int ix86_arch_mask, ix86_tune_mask;
3826 ix86_arch = (enum processor_type) ptr->arch;
3827 ix86_schedule = (enum attr_cpu) ptr->schedule;
3828 ix86_tune = (enum processor_type) ptr->tune;
3829 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3830 ix86_branch_cost = ptr->branch_cost;
3831 ix86_tune_defaulted = ptr->tune_defaulted;
3832 ix86_arch_specified = ptr->arch_specified;
3833 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3834 target_flags_explicit = ptr->target_flags_explicit;
3836 /* Recreate the arch feature tests if the arch changed */
3837 if (old_arch != ix86_arch)
3839 ix86_arch_mask = 1u << ix86_arch;
3840 for (i = 0; i < X86_ARCH_LAST; ++i)
3841 ix86_arch_features[i]
3842 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3845 /* Recreate the tune optimization tests */
3846 if (old_tune != ix86_tune)
3848 ix86_tune_mask = 1u << ix86_tune;
3849 for (i = 0; i < X86_TUNE_LAST; ++i)
3850 ix86_tune_features[i]
3851 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3855 /* Print the current options */
3858 ix86_function_specific_print (FILE *file, int indent,
3859 struct cl_target_option *ptr)
3862 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3863 NULL, NULL, NULL, false);
3865 fprintf (file, "%*sarch = %d (%s)\n",
3868 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3869 ? cpu_names[ptr->arch]
3872 fprintf (file, "%*stune = %d (%s)\n",
3875 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3876 ? cpu_names[ptr->tune]
3879 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3880 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3881 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3882 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3886 fprintf (file, "%*s%s\n", indent, "", target_string);
3887 free (target_string);
3892 /* Inner function to process the attribute((target(...))), take an argument and
3893 set the current options from the argument. If we have a list, recursively go
3897 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3902 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3903 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3904 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3905 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3920 enum ix86_opt_type type;
3925 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3926 IX86_ATTR_ISA ("abm", OPT_mabm),
3927 IX86_ATTR_ISA ("aes", OPT_maes),
3928 IX86_ATTR_ISA ("avx", OPT_mavx),
3929 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3930 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3931 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3932 IX86_ATTR_ISA ("sse", OPT_msse),
3933 IX86_ATTR_ISA ("sse2", OPT_msse2),
3934 IX86_ATTR_ISA ("sse3", OPT_msse3),
3935 IX86_ATTR_ISA ("sse4", OPT_msse4),
3936 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3937 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3938 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3939 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3940 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3941 IX86_ATTR_ISA ("xop", OPT_mxop),
3942 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3943 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3944 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3945 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3947 /* string options */
3948 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3949 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3950 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3953 IX86_ATTR_YES ("cld",
3957 IX86_ATTR_NO ("fancy-math-387",
3958 OPT_mfancy_math_387,
3959 MASK_NO_FANCY_MATH_387),
3961 IX86_ATTR_YES ("ieee-fp",
3965 IX86_ATTR_YES ("inline-all-stringops",
3966 OPT_minline_all_stringops,
3967 MASK_INLINE_ALL_STRINGOPS),
3969 IX86_ATTR_YES ("inline-stringops-dynamically",
3970 OPT_minline_stringops_dynamically,
3971 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3973 IX86_ATTR_NO ("align-stringops",
3974 OPT_mno_align_stringops,
3975 MASK_NO_ALIGN_STRINGOPS),
3977 IX86_ATTR_YES ("recip",
3983 /* If this is a list, recurse to get the options. */
3984 if (TREE_CODE (args) == TREE_LIST)
3988 for (; args; args = TREE_CHAIN (args))
3989 if (TREE_VALUE (args)
3990 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3996 else if (TREE_CODE (args) != STRING_CST)
3999 /* Handle multiple arguments separated by commas. */
4000 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4002 while (next_optstr && *next_optstr != '\0')
4004 char *p = next_optstr;
4006 char *comma = strchr (next_optstr, ',');
4007 const char *opt_string;
4008 size_t len, opt_len;
4013 enum ix86_opt_type type = ix86_opt_unknown;
4019 len = comma - next_optstr;
4020 next_optstr = comma + 1;
4028 /* Recognize no-xxx. */
4029 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4038 /* Find the option. */
4041 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4043 type = attrs[i].type;
4044 opt_len = attrs[i].len;
4045 if (ch == attrs[i].string[0]
4046 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4047 && memcmp (p, attrs[i].string, opt_len) == 0)
4050 mask = attrs[i].mask;
4051 opt_string = attrs[i].string;
4056 /* Process the option. */
4059 error ("attribute(target(\"%s\")) is unknown", orig_p);
4063 else if (type == ix86_opt_isa)
4064 ix86_handle_option (opt, p, opt_set_p);
4066 else if (type == ix86_opt_yes || type == ix86_opt_no)
4068 if (type == ix86_opt_no)
4069 opt_set_p = !opt_set_p;
4072 target_flags |= mask;
4074 target_flags &= ~mask;
4077 else if (type == ix86_opt_str)
4081 error ("option(\"%s\") was already specified", opt_string);
4085 p_strings[opt] = xstrdup (p + opt_len);
4095 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4098 ix86_valid_target_attribute_tree (tree args)
4100 const char *orig_arch_string = ix86_arch_string;
4101 const char *orig_tune_string = ix86_tune_string;
4102 const char *orig_fpmath_string = ix86_fpmath_string;
4103 int orig_tune_defaulted = ix86_tune_defaulted;
4104 int orig_arch_specified = ix86_arch_specified;
4105 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4108 struct cl_target_option *def
4109 = TREE_TARGET_OPTION (target_option_default_node);
4111 /* Process each of the options on the chain. */
4112 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4115 /* If the changed options are different from the default, rerun
4116 ix86_option_override_internal, and then save the options away.
4117 The string options are are attribute options, and will be undone
4118 when we copy the save structure. */
4119 if (ix86_isa_flags != def->ix86_isa_flags
4120 || target_flags != def->target_flags
4121 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4122 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4123 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4125 /* If we are using the default tune= or arch=, undo the string assigned,
4126 and use the default. */
4127 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4128 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4129 else if (!orig_arch_specified)
4130 ix86_arch_string = NULL;
4132 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4133 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4134 else if (orig_tune_defaulted)
4135 ix86_tune_string = NULL;
4137 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4138 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4139 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4140 else if (!TARGET_64BIT && TARGET_SSE)
4141 ix86_fpmath_string = "sse,387";
4143 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4144 ix86_option_override_internal (false);
4146 /* Add any builtin functions with the new isa if any. */
4147 ix86_add_new_builtins (ix86_isa_flags);
4149 /* Save the current options unless we are validating options for
4151 t = build_target_option_node ();
4153 ix86_arch_string = orig_arch_string;
4154 ix86_tune_string = orig_tune_string;
4155 ix86_fpmath_string = orig_fpmath_string;
4157 /* Free up memory allocated to hold the strings */
4158 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4159 if (option_strings[i])
4160 free (option_strings[i]);
4166 /* Hook to validate attribute((target("string"))). */
4169 ix86_valid_target_attribute_p (tree fndecl,
4170 tree ARG_UNUSED (name),
4172 int ARG_UNUSED (flags))
4174 struct cl_target_option cur_target;
4176 tree old_optimize = build_optimization_node ();
4177 tree new_target, new_optimize;
4178 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4180 /* If the function changed the optimization levels as well as setting target
4181 options, start with the optimizations specified. */
4182 if (func_optimize && func_optimize != old_optimize)
4183 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4185 /* The target attributes may also change some optimization flags, so update
4186 the optimization options if necessary. */
4187 cl_target_option_save (&cur_target);
4188 new_target = ix86_valid_target_attribute_tree (args);
4189 new_optimize = build_optimization_node ();
4196 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4198 if (old_optimize != new_optimize)
4199 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4202 cl_target_option_restore (&cur_target);
4204 if (old_optimize != new_optimize)
4205 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4211 /* Hook to determine if one function can safely inline another. */
4214 ix86_can_inline_p (tree caller, tree callee)
4217 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4218 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4220 /* If callee has no option attributes, then it is ok to inline. */
4224 /* If caller has no option attributes, but callee does then it is not ok to
4226 else if (!caller_tree)
4231 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4232 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4234 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4235 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4237 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4238 != callee_opts->ix86_isa_flags)
4241 /* See if we have the same non-isa options. */
4242 else if (caller_opts->target_flags != callee_opts->target_flags)
4245 /* See if arch, tune, etc. are the same. */
4246 else if (caller_opts->arch != callee_opts->arch)
4249 else if (caller_opts->tune != callee_opts->tune)
4252 else if (caller_opts->fpmath != callee_opts->fpmath)
4255 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4266 /* Remember the last target of ix86_set_current_function. */
4267 static GTY(()) tree ix86_previous_fndecl;
4269 /* Establish appropriate back-end context for processing the function
4270 FNDECL. The argument might be NULL to indicate processing at top
4271 level, outside of any function scope. */
4273 ix86_set_current_function (tree fndecl)
4275 /* Only change the context if the function changes. This hook is called
4276 several times in the course of compiling a function, and we don't want to
4277 slow things down too much or call target_reinit when it isn't safe. */
4278 if (fndecl && fndecl != ix86_previous_fndecl)
4280 tree old_tree = (ix86_previous_fndecl
4281 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4284 tree new_tree = (fndecl
4285 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4288 ix86_previous_fndecl = fndecl;
4289 if (old_tree == new_tree)
4294 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4300 struct cl_target_option *def
4301 = TREE_TARGET_OPTION (target_option_current_node);
4303 cl_target_option_restore (def);
4310 /* Return true if this goes in large data/bss. */
4313 ix86_in_large_data_p (tree exp)
4315 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4318 /* Functions are never large data. */
4319 if (TREE_CODE (exp) == FUNCTION_DECL)
4322 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4324 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4325 if (strcmp (section, ".ldata") == 0
4326 || strcmp (section, ".lbss") == 0)
4332 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4334 /* If this is an incomplete type with size 0, then we can't put it
4335 in data because it might be too big when completed. */
4336 if (!size || size > ix86_section_threshold)
4343 /* Switch to the appropriate section for output of DECL.
4344 DECL is either a `VAR_DECL' node or a constant of some sort.
4345 RELOC indicates whether forming the initial value of DECL requires
4346 link-time relocations. */
4348 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4352 x86_64_elf_select_section (tree decl, int reloc,
4353 unsigned HOST_WIDE_INT align)
4355 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4356 && ix86_in_large_data_p (decl))
4358 const char *sname = NULL;
4359 unsigned int flags = SECTION_WRITE;
4360 switch (categorize_decl_for_section (decl, reloc))
4365 case SECCAT_DATA_REL:
4366 sname = ".ldata.rel";
4368 case SECCAT_DATA_REL_LOCAL:
4369 sname = ".ldata.rel.local";
4371 case SECCAT_DATA_REL_RO:
4372 sname = ".ldata.rel.ro";
4374 case SECCAT_DATA_REL_RO_LOCAL:
4375 sname = ".ldata.rel.ro.local";
4379 flags |= SECTION_BSS;
4382 case SECCAT_RODATA_MERGE_STR:
4383 case SECCAT_RODATA_MERGE_STR_INIT:
4384 case SECCAT_RODATA_MERGE_CONST:
4388 case SECCAT_SRODATA:
4395 /* We don't split these for medium model. Place them into
4396 default sections and hope for best. */
4401 /* We might get called with string constants, but get_named_section
4402 doesn't like them as they are not DECLs. Also, we need to set
4403 flags in that case. */
4405 return get_section (sname, flags, NULL);
4406 return get_named_section (decl, sname, reloc);
4409 return default_elf_select_section (decl, reloc, align);
4412 /* Build up a unique section name, expressed as a
4413 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4414 RELOC indicates whether the initial value of EXP requires
4415 link-time relocations. */
4417 static void ATTRIBUTE_UNUSED
4418 x86_64_elf_unique_section (tree decl, int reloc)
4420 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4421 && ix86_in_large_data_p (decl))
4423 const char *prefix = NULL;
4424 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4425 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4427 switch (categorize_decl_for_section (decl, reloc))
4430 case SECCAT_DATA_REL:
4431 case SECCAT_DATA_REL_LOCAL:
4432 case SECCAT_DATA_REL_RO:
4433 case SECCAT_DATA_REL_RO_LOCAL:
4434 prefix = one_only ? ".ld" : ".ldata";
4437 prefix = one_only ? ".lb" : ".lbss";
4440 case SECCAT_RODATA_MERGE_STR:
4441 case SECCAT_RODATA_MERGE_STR_INIT:
4442 case SECCAT_RODATA_MERGE_CONST:
4443 prefix = one_only ? ".lr" : ".lrodata";
4445 case SECCAT_SRODATA:
4452 /* We don't split these for medium model. Place them into
4453 default sections and hope for best. */
4458 const char *name, *linkonce;
4461 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4462 name = targetm.strip_name_encoding (name);
4464 /* If we're using one_only, then there needs to be a .gnu.linkonce
4465 prefix to the section name. */
4466 linkonce = one_only ? ".gnu.linkonce" : "";
4468 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4470 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4474 default_unique_section (decl, reloc);
4477 #ifdef COMMON_ASM_OP
4478 /* This says how to output assembler code to declare an
4479 uninitialized external linkage data object.
4481 For medium model x86-64 we need to use .largecomm opcode for
4484 x86_elf_aligned_common (FILE *file,
4485 const char *name, unsigned HOST_WIDE_INT size,
4488 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4489 && size > (unsigned int)ix86_section_threshold)
4490 fputs (".largecomm\t", file);
4492 fputs (COMMON_ASM_OP, file);
4493 assemble_name (file, name);
4494 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4495 size, align / BITS_PER_UNIT);
4499 /* Utility function for targets to use in implementing
4500 ASM_OUTPUT_ALIGNED_BSS. */
4503 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4504 const char *name, unsigned HOST_WIDE_INT size,
4507 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4508 && size > (unsigned int)ix86_section_threshold)
4509 switch_to_section (get_named_section (decl, ".lbss", 0));
4511 switch_to_section (bss_section);
4512 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4513 #ifdef ASM_DECLARE_OBJECT_NAME
4514 last_assemble_variable_decl = decl;
4515 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4517 /* Standard thing is just output label for the object. */
4518 ASM_OUTPUT_LABEL (file, name);
4519 #endif /* ASM_DECLARE_OBJECT_NAME */
4520 ASM_OUTPUT_SKIP (file, size ? size : 1);
4524 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4526 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4527 make the problem with not enough registers even worse. */
4528 #ifdef INSN_SCHEDULING
4530 flag_schedule_insns = 0;
4534 /* The Darwin libraries never set errno, so we might as well
4535 avoid calling them when that's the only reason we would. */
4536 flag_errno_math = 0;
4538 /* The default values of these switches depend on the TARGET_64BIT
4539 that is not known at this moment. Mark these values with 2 and
4540 let user the to override these. In case there is no command line
4541 option specifying them, we will set the defaults in
4542 ix86_option_override_internal. */
4544 flag_omit_frame_pointer = 2;
4546 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4550 flag_pcc_struct_return = 2;
4551 flag_asynchronous_unwind_tables = 2;
4552 flag_vect_cost_model = 1;
4553 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4554 SUBTARGET_OPTIMIZATION_OPTIONS;
4558 /* Decide whether we must probe the stack before any space allocation
4559 on this target. It's essentially TARGET_STACK_PROBE except when
4560 -fstack-check causes the stack to be already probed differently. */
4563 ix86_target_stack_probe (void)
4565 /* Do not probe the stack twice if static stack checking is enabled. */
4566 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4569 return TARGET_STACK_PROBE;
4572 /* Decide whether we can make a sibling call to a function. DECL is the
4573 declaration of the function being targeted by the call and EXP is the
4574 CALL_EXPR representing the call. */
4577 ix86_function_ok_for_sibcall (tree decl, tree exp)
4579 tree type, decl_or_type;
4582 /* If we are generating position-independent code, we cannot sibcall
4583 optimize any indirect call, or a direct call to a global function,
4584 as the PLT requires %ebx be live. */
4585 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4588 /* If we need to align the outgoing stack, then sibcalling would
4589 unalign the stack, which may break the called function. */
4590 if (ix86_minimum_incoming_stack_boundary (true)
4591 < PREFERRED_STACK_BOUNDARY)
4596 decl_or_type = decl;
4597 type = TREE_TYPE (decl);
4601 /* We're looking at the CALL_EXPR, we need the type of the function. */
4602 type = CALL_EXPR_FN (exp); /* pointer expression */
4603 type = TREE_TYPE (type); /* pointer type */
4604 type = TREE_TYPE (type); /* function type */
4605 decl_or_type = type;
4608 /* Check that the return value locations are the same. Like
4609 if we are returning floats on the 80387 register stack, we cannot
4610 make a sibcall from a function that doesn't return a float to a
4611 function that does or, conversely, from a function that does return
4612 a float to a function that doesn't; the necessary stack adjustment
4613 would not be executed. This is also the place we notice
4614 differences in the return value ABI. Note that it is ok for one
4615 of the functions to have void return type as long as the return
4616 value of the other is passed in a register. */
4617 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4618 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4620 if (STACK_REG_P (a) || STACK_REG_P (b))
4622 if (!rtx_equal_p (a, b))
4625 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4627 else if (!rtx_equal_p (a, b))
4632 /* The SYSV ABI has more call-clobbered registers;
4633 disallow sibcalls from MS to SYSV. */
4634 if (cfun->machine->call_abi == MS_ABI
4635 && ix86_function_type_abi (type) == SYSV_ABI)
4640 /* If this call is indirect, we'll need to be able to use a
4641 call-clobbered register for the address of the target function.
4642 Make sure that all such registers are not used for passing
4643 parameters. Note that DLLIMPORT functions are indirect. */
4645 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4647 if (ix86_function_regparm (type, NULL) >= 3)
4649 /* ??? Need to count the actual number of registers to be used,
4650 not the possible number of registers. Fix later. */
4656 /* Otherwise okay. That also includes certain types of indirect calls. */
4660 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4661 and "sseregparm" calling convention attributes;
4662 arguments as in struct attribute_spec.handler. */
4665 ix86_handle_cconv_attribute (tree *node, tree name,
4667 int flags ATTRIBUTE_UNUSED,
4670 if (TREE_CODE (*node) != FUNCTION_TYPE
4671 && TREE_CODE (*node) != METHOD_TYPE
4672 && TREE_CODE (*node) != FIELD_DECL
4673 && TREE_CODE (*node) != TYPE_DECL)
4675 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4677 *no_add_attrs = true;
4681 /* Can combine regparm with all attributes but fastcall. */
4682 if (is_attribute_p ("regparm", name))
4686 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4688 error ("fastcall and regparm attributes are not compatible");
4691 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4693 error ("regparam and thiscall attributes are not compatible");
4696 cst = TREE_VALUE (args);
4697 if (TREE_CODE (cst) != INTEGER_CST)
4699 warning (OPT_Wattributes,
4700 "%qE attribute requires an integer constant argument",
4702 *no_add_attrs = true;
4704 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4706 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4708 *no_add_attrs = true;
4716 /* Do not warn when emulating the MS ABI. */
4717 if ((TREE_CODE (*node) != FUNCTION_TYPE
4718 && TREE_CODE (*node) != METHOD_TYPE)
4719 || ix86_function_type_abi (*node) != MS_ABI)
4720 warning (OPT_Wattributes, "%qE attribute ignored",
4722 *no_add_attrs = true;
4726 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4727 if (is_attribute_p ("fastcall", name))
4729 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4731 error ("fastcall and cdecl attributes are not compatible");
4733 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4735 error ("fastcall and stdcall attributes are not compatible");
4737 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4739 error ("fastcall and regparm attributes are not compatible");
4741 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4743 error ("fastcall and thiscall attributes are not compatible");
4747 /* Can combine stdcall with fastcall (redundant), regparm and
4749 else if (is_attribute_p ("stdcall", name))
4751 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4753 error ("stdcall and cdecl attributes are not compatible");
4755 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4757 error ("stdcall and fastcall attributes are not compatible");
4759 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4761 error ("stdcall and thiscall attributes are not compatible");
4765 /* Can combine cdecl with regparm and sseregparm. */
4766 else if (is_attribute_p ("cdecl", name))
4768 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4770 error ("stdcall and cdecl attributes are not compatible");
4772 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4774 error ("fastcall and cdecl attributes are not compatible");
4776 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4778 error ("cdecl and thiscall attributes are not compatible");
4781 else if (is_attribute_p ("thiscall", name))
4783 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4784 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4786 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4788 error ("stdcall and thiscall attributes are not compatible");
4790 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4792 error ("fastcall and thiscall attributes are not compatible");
4794 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4796 error ("cdecl and thiscall attributes are not compatible");
4800 /* Can combine sseregparm with all attributes. */
4805 /* Return 0 if the attributes for two types are incompatible, 1 if they
4806 are compatible, and 2 if they are nearly compatible (which causes a
4807 warning to be generated). */
4810 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4812 /* Check for mismatch of non-default calling convention. */
4813 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4815 if (TREE_CODE (type1) != FUNCTION_TYPE
4816 && TREE_CODE (type1) != METHOD_TYPE)
4819 /* Check for mismatched fastcall/regparm types. */
4820 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4821 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4822 || (ix86_function_regparm (type1, NULL)
4823 != ix86_function_regparm (type2, NULL)))
4826 /* Check for mismatched sseregparm types. */
4827 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4828 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4831 /* Check for mismatched thiscall types. */
4832 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4833 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4836 /* Check for mismatched return types (cdecl vs stdcall). */
4837 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4838 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4844 /* Return the regparm value for a function with the indicated TYPE and DECL.
4845 DECL may be NULL when calling function indirectly
4846 or considering a libcall. */
4849 ix86_function_regparm (const_tree type, const_tree decl)
4855 return (ix86_function_type_abi (type) == SYSV_ABI
4856 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4858 regparm = ix86_regparm;
4859 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4862 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4866 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4869 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4872 /* Use register calling convention for local functions when possible. */
4874 && TREE_CODE (decl) == FUNCTION_DECL
4876 && !(profile_flag && !flag_fentry))
4878 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4879 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4882 int local_regparm, globals = 0, regno;
4884 /* Make sure no regparm register is taken by a
4885 fixed register variable. */
4886 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4887 if (fixed_regs[local_regparm])
4890 /* We don't want to use regparm(3) for nested functions as
4891 these use a static chain pointer in the third argument. */
4892 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4895 /* In 32-bit mode save a register for the split stack. */
4896 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4899 /* Each fixed register usage increases register pressure,
4900 so less registers should be used for argument passing.
4901 This functionality can be overriden by an explicit
4903 for (regno = 0; regno <= DI_REG; regno++)
4904 if (fixed_regs[regno])
4908 = globals < local_regparm ? local_regparm - globals : 0;
4910 if (local_regparm > regparm)
4911 regparm = local_regparm;
4918 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4919 DFmode (2) arguments in SSE registers for a function with the
4920 indicated TYPE and DECL. DECL may be NULL when calling function
4921 indirectly or considering a libcall. Otherwise return 0. */
4924 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4926 gcc_assert (!TARGET_64BIT);
4928 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4929 by the sseregparm attribute. */
4930 if (TARGET_SSEREGPARM
4931 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4938 error ("Calling %qD with attribute sseregparm without "
4939 "SSE/SSE2 enabled", decl);
4941 error ("Calling %qT with attribute sseregparm without "
4942 "SSE/SSE2 enabled", type);
4950 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4951 (and DFmode for SSE2) arguments in SSE registers. */
4952 if (decl && TARGET_SSE_MATH && optimize
4953 && !(profile_flag && !flag_fentry))
4955 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4956 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4958 return TARGET_SSE2 ? 2 : 1;
4964 /* Return true if EAX is live at the start of the function. Used by
4965 ix86_expand_prologue to determine if we need special help before
4966 calling allocate_stack_worker. */
4969 ix86_eax_live_at_start_p (void)
4971 /* Cheat. Don't bother working forward from ix86_function_regparm
4972 to the function type to whether an actual argument is located in
4973 eax. Instead just look at cfg info, which is still close enough
4974 to correct at this point. This gives false positives for broken
4975 functions that might use uninitialized data that happens to be
4976 allocated in eax, but who cares? */
4977 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4980 /* Value is the number of bytes of arguments automatically
4981 popped when returning from a subroutine call.
4982 FUNDECL is the declaration node of the function (as a tree),
4983 FUNTYPE is the data type of the function (as a tree),
4984 or for a library call it is an identifier node for the subroutine name.
4985 SIZE is the number of bytes of arguments passed on the stack.
4987 On the 80386, the RTD insn may be used to pop them if the number
4988 of args is fixed, but if the number is variable then the caller
4989 must pop them all. RTD can't be used for library calls now
4990 because the library is compiled with the Unix compiler.
4991 Use of RTD is a selectable option, since it is incompatible with
4992 standard Unix calling sequences. If the option is not selected,
4993 the caller must always pop the args.
4995 The attribute stdcall is equivalent to RTD on a per module basis. */
4998 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5002 /* None of the 64-bit ABIs pop arguments. */
5006 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5008 /* Cdecl functions override -mrtd, and never pop the stack. */
5009 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5011 /* Stdcall and fastcall functions will pop the stack if not
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5014 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5015 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5018 if (rtd && ! stdarg_p (funtype))
5022 /* Lose any fake structure return argument if it is passed on the stack. */
5023 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5024 && !KEEP_AGGREGATE_RETURN_POINTER)
5026 int nregs = ix86_function_regparm (funtype, fundecl);
5028 return GET_MODE_SIZE (Pmode);
5034 /* Argument support functions. */
5036 /* Return true when register may be used to pass function parameters. */
5038 ix86_function_arg_regno_p (int regno)
5041 const int *parm_regs;
5046 return (regno < REGPARM_MAX
5047 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5049 return (regno < REGPARM_MAX
5050 || (TARGET_MMX && MMX_REGNO_P (regno)
5051 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5052 || (TARGET_SSE && SSE_REGNO_P (regno)
5053 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5058 if (SSE_REGNO_P (regno) && TARGET_SSE)
5063 if (TARGET_SSE && SSE_REGNO_P (regno)
5064 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5068 /* TODO: The function should depend on current function ABI but
5069 builtins.c would need updating then. Therefore we use the
5072 /* RAX is used as hidden argument to va_arg functions. */
5073 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5076 if (ix86_abi == MS_ABI)
5077 parm_regs = x86_64_ms_abi_int_parameter_registers;
5079 parm_regs = x86_64_int_parameter_registers;
5080 for (i = 0; i < (ix86_abi == MS_ABI
5081 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5082 if (regno == parm_regs[i])
5087 /* Return if we do not know how to pass TYPE solely in registers. */
5090 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5092 if (must_pass_in_stack_var_size_or_pad (mode, type))
5095 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5096 The layout_type routine is crafty and tries to trick us into passing
5097 currently unsupported vector types on the stack by using TImode. */
5098 return (!TARGET_64BIT && mode == TImode
5099 && type && TREE_CODE (type) != VECTOR_TYPE);
5102 /* It returns the size, in bytes, of the area reserved for arguments passed
5103 in registers for the function represented by fndecl dependent to the used
5106 ix86_reg_parm_stack_space (const_tree fndecl)
5108 enum calling_abi call_abi = SYSV_ABI;
5109 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5110 call_abi = ix86_function_abi (fndecl);
5112 call_abi = ix86_function_type_abi (fndecl);
5113 if (call_abi == MS_ABI)
5118 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5121 ix86_function_type_abi (const_tree fntype)
5123 if (TARGET_64BIT && fntype != NULL)
5125 enum calling_abi abi = ix86_abi;
5126 if (abi == SYSV_ABI)
5128 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5131 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5139 ix86_function_ms_hook_prologue (const_tree fn)
5141 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5143 if (decl_function_context (fn) != NULL_TREE)
5144 error_at (DECL_SOURCE_LOCATION (fn),
5145 "ms_hook_prologue is not compatible with nested function");
5152 static enum calling_abi
5153 ix86_function_abi (const_tree fndecl)
5157 return ix86_function_type_abi (TREE_TYPE (fndecl));
5160 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5163 ix86_cfun_abi (void)
5165 if (! cfun || ! TARGET_64BIT)
5167 return cfun->machine->call_abi;
5170 /* Write the extra assembler code needed to declare a function properly. */
5173 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5176 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5180 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5181 unsigned int filler_cc = 0xcccccccc;
5183 for (i = 0; i < filler_count; i += 4)
5184 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5187 ASM_OUTPUT_LABEL (asm_out_file, fname);
5189 /* Output magic byte marker, if hot-patch attribute is set. */
5194 /* leaq [%rsp + 0], %rsp */
5195 asm_fprintf (asm_out_file, ASM_BYTE
5196 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5200 /* movl.s %edi, %edi
5202 movl.s %esp, %ebp */
5203 asm_fprintf (asm_out_file, ASM_BYTE
5204 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5210 extern void init_regs (void);
5212 /* Implementation of call abi switching target hook. Specific to FNDECL
5213 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5214 for more details. */
5216 ix86_call_abi_override (const_tree fndecl)
5218 if (fndecl == NULL_TREE)
5219 cfun->machine->call_abi = ix86_abi;
5221 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5224 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5225 re-initialization of init_regs each time we switch function context since
5226 this is needed only during RTL expansion. */
5228 ix86_maybe_switch_abi (void)
5231 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5235 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5236 for a call to a function whose data type is FNTYPE.
5237 For a library call, FNTYPE is 0. */
5240 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5241 tree fntype, /* tree ptr for function decl */
5242 rtx libname, /* SYMBOL_REF of library name or 0 */
5245 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5246 memset (cum, 0, sizeof (*cum));
5249 cum->call_abi = ix86_function_abi (fndecl);
5251 cum->call_abi = ix86_function_type_abi (fntype);
5252 /* Set up the number of registers to use for passing arguments. */
5254 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5255 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5256 "or subtarget optimization implying it");
5257 cum->nregs = ix86_regparm;
5260 cum->nregs = (cum->call_abi == SYSV_ABI
5261 ? X86_64_REGPARM_MAX
5262 : X86_64_MS_REGPARM_MAX);
5266 cum->sse_nregs = SSE_REGPARM_MAX;
5269 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5270 ? X86_64_SSE_REGPARM_MAX
5271 : X86_64_MS_SSE_REGPARM_MAX);
5275 cum->mmx_nregs = MMX_REGPARM_MAX;
5276 cum->warn_avx = true;
5277 cum->warn_sse = true;
5278 cum->warn_mmx = true;
5280 /* Because type might mismatch in between caller and callee, we need to
5281 use actual type of function for local calls.
5282 FIXME: cgraph_analyze can be told to actually record if function uses
5283 va_start so for local functions maybe_vaarg can be made aggressive
5285 FIXME: once typesytem is fixed, we won't need this code anymore. */
5287 fntype = TREE_TYPE (fndecl);
5288 cum->maybe_vaarg = (fntype
5289 ? (!prototype_p (fntype) || stdarg_p (fntype))
5294 /* If there are variable arguments, then we won't pass anything
5295 in registers in 32-bit mode. */
5296 if (stdarg_p (fntype))
5307 /* Use ecx and edx registers if function has fastcall attribute,
5308 else look for regparm information. */
5311 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5314 cum->fastcall = 1; /* Same first register as in fastcall. */
5316 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5322 cum->nregs = ix86_function_regparm (fntype, fndecl);
5325 /* Set up the number of SSE registers used for passing SFmode
5326 and DFmode arguments. Warn for mismatching ABI. */
5327 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5331 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5332 But in the case of vector types, it is some vector mode.
5334 When we have only some of our vector isa extensions enabled, then there
5335 are some modes for which vector_mode_supported_p is false. For these
5336 modes, the generic vector support in gcc will choose some non-vector mode
5337 in order to implement the type. By computing the natural mode, we'll
5338 select the proper ABI location for the operand and not depend on whatever
5339 the middle-end decides to do with these vector types.
5341 The midde-end can't deal with the vector types > 16 bytes. In this
5342 case, we return the original mode and warn ABI change if CUM isn't
5345 static enum machine_mode
5346 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5348 enum machine_mode mode = TYPE_MODE (type);
5350 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5352 HOST_WIDE_INT size = int_size_in_bytes (type);
5353 if ((size == 8 || size == 16 || size == 32)
5354 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5355 && TYPE_VECTOR_SUBPARTS (type) > 1)
5357 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5359 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5360 mode = MIN_MODE_VECTOR_FLOAT;
5362 mode = MIN_MODE_VECTOR_INT;
5364 /* Get the mode which has this inner mode and number of units. */
5365 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5366 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5367 && GET_MODE_INNER (mode) == innermode)
5369 if (size == 32 && !TARGET_AVX)
5371 static bool warnedavx;
5378 warning (0, "AVX vector argument without AVX "
5379 "enabled changes the ABI");
5381 return TYPE_MODE (type);
5394 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5395 this may not agree with the mode that the type system has chosen for the
5396 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5397 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5400 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5405 if (orig_mode != BLKmode)
5406 tmp = gen_rtx_REG (orig_mode, regno);
5409 tmp = gen_rtx_REG (mode, regno);
5410 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5411 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5417 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5418 of this code is to classify each 8bytes of incoming argument by the register
5419 class and assign registers accordingly. */
5421 /* Return the union class of CLASS1 and CLASS2.
5422 See the x86-64 PS ABI for details. */
5424 static enum x86_64_reg_class
5425 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5427 /* Rule #1: If both classes are equal, this is the resulting class. */
5428 if (class1 == class2)
5431 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5433 if (class1 == X86_64_NO_CLASS)
5435 if (class2 == X86_64_NO_CLASS)
5438 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5439 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5440 return X86_64_MEMORY_CLASS;
5442 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5443 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5444 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5445 return X86_64_INTEGERSI_CLASS;
5446 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5447 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5448 return X86_64_INTEGER_CLASS;
5450 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5452 if (class1 == X86_64_X87_CLASS
5453 || class1 == X86_64_X87UP_CLASS
5454 || class1 == X86_64_COMPLEX_X87_CLASS
5455 || class2 == X86_64_X87_CLASS
5456 || class2 == X86_64_X87UP_CLASS
5457 || class2 == X86_64_COMPLEX_X87_CLASS)
5458 return X86_64_MEMORY_CLASS;
5460 /* Rule #6: Otherwise class SSE is used. */
5461 return X86_64_SSE_CLASS;
5464 /* Classify the argument of type TYPE and mode MODE.
5465 CLASSES will be filled by the register class used to pass each word
5466 of the operand. The number of words is returned. In case the parameter
5467 should be passed in memory, 0 is returned. As a special case for zero
5468 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5470 BIT_OFFSET is used internally for handling records and specifies offset
5471 of the offset in bits modulo 256 to avoid overflow cases.
5473 See the x86-64 PS ABI for details.
5477 classify_argument (enum machine_mode mode, const_tree type,
5478 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5480 HOST_WIDE_INT bytes =
5481 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5482 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5484 /* Variable sized entities are always passed/returned in memory. */
5488 if (mode != VOIDmode
5489 && targetm.calls.must_pass_in_stack (mode, type))
5492 if (type && AGGREGATE_TYPE_P (type))
5496 enum x86_64_reg_class subclasses[MAX_CLASSES];
5498 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5502 for (i = 0; i < words; i++)
5503 classes[i] = X86_64_NO_CLASS;
5505 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5506 signalize memory class, so handle it as special case. */
5509 classes[0] = X86_64_NO_CLASS;
5513 /* Classify each field of record and merge classes. */
5514 switch (TREE_CODE (type))
5517 /* And now merge the fields of structure. */
5518 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5520 if (TREE_CODE (field) == FIELD_DECL)
5524 if (TREE_TYPE (field) == error_mark_node)
5527 /* Bitfields are always classified as integer. Handle them
5528 early, since later code would consider them to be
5529 misaligned integers. */
5530 if (DECL_BIT_FIELD (field))
5532 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5533 i < ((int_bit_position (field) + (bit_offset % 64))
5534 + tree_low_cst (DECL_SIZE (field), 0)
5537 merge_classes (X86_64_INTEGER_CLASS,
5544 type = TREE_TYPE (field);
5546 /* Flexible array member is ignored. */
5547 if (TYPE_MODE (type) == BLKmode
5548 && TREE_CODE (type) == ARRAY_TYPE
5549 && TYPE_SIZE (type) == NULL_TREE
5550 && TYPE_DOMAIN (type) != NULL_TREE
5551 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5556 if (!warned && warn_psabi)
5559 inform (input_location,
5560 "The ABI of passing struct with"
5561 " a flexible array member has"
5562 " changed in GCC 4.4");
5566 num = classify_argument (TYPE_MODE (type), type,
5568 (int_bit_position (field)
5569 + bit_offset) % 256);
5572 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5573 for (i = 0; i < num && (i + pos) < words; i++)
5575 merge_classes (subclasses[i], classes[i + pos]);
5582 /* Arrays are handled as small records. */
5585 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5586 TREE_TYPE (type), subclasses, bit_offset);
5590 /* The partial classes are now full classes. */
5591 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5592 subclasses[0] = X86_64_SSE_CLASS;
5593 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5594 && !((bit_offset % 64) == 0 && bytes == 4))
5595 subclasses[0] = X86_64_INTEGER_CLASS;
5597 for (i = 0; i < words; i++)
5598 classes[i] = subclasses[i % num];
5603 case QUAL_UNION_TYPE:
5604 /* Unions are similar to RECORD_TYPE but offset is always 0.
5606 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5608 if (TREE_CODE (field) == FIELD_DECL)
5612 if (TREE_TYPE (field) == error_mark_node)
5615 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5616 TREE_TYPE (field), subclasses,
5620 for (i = 0; i < num; i++)
5621 classes[i] = merge_classes (subclasses[i], classes[i]);
5632 /* When size > 16 bytes, if the first one isn't
5633 X86_64_SSE_CLASS or any other ones aren't
5634 X86_64_SSEUP_CLASS, everything should be passed in
5636 if (classes[0] != X86_64_SSE_CLASS)
5639 for (i = 1; i < words; i++)
5640 if (classes[i] != X86_64_SSEUP_CLASS)
5644 /* Final merger cleanup. */
5645 for (i = 0; i < words; i++)
5647 /* If one class is MEMORY, everything should be passed in
5649 if (classes[i] == X86_64_MEMORY_CLASS)
5652 /* The X86_64_SSEUP_CLASS should be always preceded by
5653 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5654 if (classes[i] == X86_64_SSEUP_CLASS
5655 && classes[i - 1] != X86_64_SSE_CLASS
5656 && classes[i - 1] != X86_64_SSEUP_CLASS)
5658 /* The first one should never be X86_64_SSEUP_CLASS. */
5659 gcc_assert (i != 0);
5660 classes[i] = X86_64_SSE_CLASS;
5663 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5664 everything should be passed in memory. */
5665 if (classes[i] == X86_64_X87UP_CLASS
5666 && (classes[i - 1] != X86_64_X87_CLASS))
5670 /* The first one should never be X86_64_X87UP_CLASS. */
5671 gcc_assert (i != 0);
5672 if (!warned && warn_psabi)
5675 inform (input_location,
5676 "The ABI of passing union with long double"
5677 " has changed in GCC 4.4");
5685 /* Compute alignment needed. We align all types to natural boundaries with
5686 exception of XFmode that is aligned to 64bits. */
5687 if (mode != VOIDmode && mode != BLKmode)
5689 int mode_alignment = GET_MODE_BITSIZE (mode);
5692 mode_alignment = 128;
5693 else if (mode == XCmode)
5694 mode_alignment = 256;
5695 if (COMPLEX_MODE_P (mode))
5696 mode_alignment /= 2;
5697 /* Misaligned fields are always returned in memory. */
5698 if (bit_offset % mode_alignment)
5702 /* for V1xx modes, just use the base mode */
5703 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5704 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5705 mode = GET_MODE_INNER (mode);
5707 /* Classification of atomic types. */
5712 classes[0] = X86_64_SSE_CLASS;
5715 classes[0] = X86_64_SSE_CLASS;
5716 classes[1] = X86_64_SSEUP_CLASS;
5726 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5730 classes[0] = X86_64_INTEGERSI_CLASS;
5733 else if (size <= 64)
5735 classes[0] = X86_64_INTEGER_CLASS;
5738 else if (size <= 64+32)
5740 classes[0] = X86_64_INTEGER_CLASS;
5741 classes[1] = X86_64_INTEGERSI_CLASS;
5744 else if (size <= 64+64)
5746 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5754 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5758 /* OImode shouldn't be used directly. */
5763 if (!(bit_offset % 64))
5764 classes[0] = X86_64_SSESF_CLASS;
5766 classes[0] = X86_64_SSE_CLASS;
5769 classes[0] = X86_64_SSEDF_CLASS;
5772 classes[0] = X86_64_X87_CLASS;
5773 classes[1] = X86_64_X87UP_CLASS;
5776 classes[0] = X86_64_SSE_CLASS;
5777 classes[1] = X86_64_SSEUP_CLASS;
5780 classes[0] = X86_64_SSE_CLASS;
5781 if (!(bit_offset % 64))
5787 if (!warned && warn_psabi)
5790 inform (input_location,
5791 "The ABI of passing structure with complex float"
5792 " member has changed in GCC 4.4");
5794 classes[1] = X86_64_SSESF_CLASS;
5798 classes[0] = X86_64_SSEDF_CLASS;
5799 classes[1] = X86_64_SSEDF_CLASS;
5802 classes[0] = X86_64_COMPLEX_X87_CLASS;
5805 /* This modes is larger than 16 bytes. */
5813 classes[0] = X86_64_SSE_CLASS;
5814 classes[1] = X86_64_SSEUP_CLASS;
5815 classes[2] = X86_64_SSEUP_CLASS;
5816 classes[3] = X86_64_SSEUP_CLASS;
5824 classes[0] = X86_64_SSE_CLASS;
5825 classes[1] = X86_64_SSEUP_CLASS;
5833 classes[0] = X86_64_SSE_CLASS;
5839 gcc_assert (VECTOR_MODE_P (mode));
5844 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5846 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5847 classes[0] = X86_64_INTEGERSI_CLASS;
5849 classes[0] = X86_64_INTEGER_CLASS;
5850 classes[1] = X86_64_INTEGER_CLASS;
5851 return 1 + (bytes > 8);
5855 /* Examine the argument and return set number of register required in each
5856 class. Return 0 iff parameter should be passed in memory. */
5858 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5859 int *int_nregs, int *sse_nregs)
5861 enum x86_64_reg_class regclass[MAX_CLASSES];
5862 int n = classify_argument (mode, type, regclass, 0);
5868 for (n--; n >= 0; n--)
5869 switch (regclass[n])
5871 case X86_64_INTEGER_CLASS:
5872 case X86_64_INTEGERSI_CLASS:
5875 case X86_64_SSE_CLASS:
5876 case X86_64_SSESF_CLASS:
5877 case X86_64_SSEDF_CLASS:
5880 case X86_64_NO_CLASS:
5881 case X86_64_SSEUP_CLASS:
5883 case X86_64_X87_CLASS:
5884 case X86_64_X87UP_CLASS:
5888 case X86_64_COMPLEX_X87_CLASS:
5889 return in_return ? 2 : 0;
5890 case X86_64_MEMORY_CLASS:
5896 /* Construct container for the argument used by GCC interface. See
5897 FUNCTION_ARG for the detailed description. */
5900 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5901 const_tree type, int in_return, int nintregs, int nsseregs,
5902 const int *intreg, int sse_regno)
5904 /* The following variables hold the static issued_error state. */
5905 static bool issued_sse_arg_error;
5906 static bool issued_sse_ret_error;
5907 static bool issued_x87_ret_error;
5909 enum machine_mode tmpmode;
5911 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5912 enum x86_64_reg_class regclass[MAX_CLASSES];
5916 int needed_sseregs, needed_intregs;
5917 rtx exp[MAX_CLASSES];
5920 n = classify_argument (mode, type, regclass, 0);
5923 if (!examine_argument (mode, type, in_return, &needed_intregs,
5926 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5929 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5930 some less clueful developer tries to use floating-point anyway. */
5931 if (needed_sseregs && !TARGET_SSE)
5935 if (!issued_sse_ret_error)
5937 error ("SSE register return with SSE disabled");
5938 issued_sse_ret_error = true;
5941 else if (!issued_sse_arg_error)
5943 error ("SSE register argument with SSE disabled");
5944 issued_sse_arg_error = true;
5949 /* Likewise, error if the ABI requires us to return values in the
5950 x87 registers and the user specified -mno-80387. */
5951 if (!TARGET_80387 && in_return)
5952 for (i = 0; i < n; i++)
5953 if (regclass[i] == X86_64_X87_CLASS
5954 || regclass[i] == X86_64_X87UP_CLASS
5955 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5957 if (!issued_x87_ret_error)
5959 error ("x87 register return with x87 disabled");
5960 issued_x87_ret_error = true;
5965 /* First construct simple cases. Avoid SCmode, since we want to use
5966 single register to pass this type. */
5967 if (n == 1 && mode != SCmode)
5968 switch (regclass[0])
5970 case X86_64_INTEGER_CLASS:
5971 case X86_64_INTEGERSI_CLASS:
5972 return gen_rtx_REG (mode, intreg[0]);
5973 case X86_64_SSE_CLASS:
5974 case X86_64_SSESF_CLASS:
5975 case X86_64_SSEDF_CLASS:
5976 if (mode != BLKmode)
5977 return gen_reg_or_parallel (mode, orig_mode,
5978 SSE_REGNO (sse_regno));
5980 case X86_64_X87_CLASS:
5981 case X86_64_COMPLEX_X87_CLASS:
5982 return gen_rtx_REG (mode, FIRST_STACK_REG);
5983 case X86_64_NO_CLASS:
5984 /* Zero sized array, struct or class. */
5989 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5990 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5991 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5993 && regclass[0] == X86_64_SSE_CLASS
5994 && regclass[1] == X86_64_SSEUP_CLASS
5995 && regclass[2] == X86_64_SSEUP_CLASS
5996 && regclass[3] == X86_64_SSEUP_CLASS
5998 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6001 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6002 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6003 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6004 && regclass[1] == X86_64_INTEGER_CLASS
6005 && (mode == CDImode || mode == TImode || mode == TFmode)
6006 && intreg[0] + 1 == intreg[1])
6007 return gen_rtx_REG (mode, intreg[0]);
6009 /* Otherwise figure out the entries of the PARALLEL. */
6010 for (i = 0; i < n; i++)
6014 switch (regclass[i])
6016 case X86_64_NO_CLASS:
6018 case X86_64_INTEGER_CLASS:
6019 case X86_64_INTEGERSI_CLASS:
6020 /* Merge TImodes on aligned occasions here too. */
6021 if (i * 8 + 8 > bytes)
6022 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6023 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6027 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6028 if (tmpmode == BLKmode)
6030 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6031 gen_rtx_REG (tmpmode, *intreg),
6035 case X86_64_SSESF_CLASS:
6036 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6037 gen_rtx_REG (SFmode,
6038 SSE_REGNO (sse_regno)),
6042 case X86_64_SSEDF_CLASS:
6043 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6044 gen_rtx_REG (DFmode,
6045 SSE_REGNO (sse_regno)),
6049 case X86_64_SSE_CLASS:
6057 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6067 && regclass[1] == X86_64_SSEUP_CLASS
6068 && regclass[2] == X86_64_SSEUP_CLASS
6069 && regclass[3] == X86_64_SSEUP_CLASS);
6076 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6077 gen_rtx_REG (tmpmode,
6078 SSE_REGNO (sse_regno)),
6087 /* Empty aligned struct, union or class. */
6091 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6092 for (i = 0; i < nexps; i++)
6093 XVECEXP (ret, 0, i) = exp [i];
6097 /* Update the data in CUM to advance over an argument of mode MODE
6098 and data type TYPE. (TYPE is null for libcalls where that information
6099 may not be available.) */
6102 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6103 const_tree type, HOST_WIDE_INT bytes,
6104 HOST_WIDE_INT words)
6120 cum->words += words;
6121 cum->nregs -= words;
6122 cum->regno += words;
6124 if (cum->nregs <= 0)
6132 /* OImode shouldn't be used directly. */
6136 if (cum->float_in_sse < 2)
6139 if (cum->float_in_sse < 1)
6156 if (!type || !AGGREGATE_TYPE_P (type))
6158 cum->sse_words += words;
6159 cum->sse_nregs -= 1;
6160 cum->sse_regno += 1;
6161 if (cum->sse_nregs <= 0)
6175 if (!type || !AGGREGATE_TYPE_P (type))
6177 cum->mmx_words += words;
6178 cum->mmx_nregs -= 1;
6179 cum->mmx_regno += 1;
6180 if (cum->mmx_nregs <= 0)
6191 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6192 const_tree type, HOST_WIDE_INT words, bool named)
6194 int int_nregs, sse_nregs;
6196 /* Unnamed 256bit vector mode parameters are passed on stack. */
6197 if (!named && VALID_AVX256_REG_MODE (mode))
6200 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6201 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6203 cum->nregs -= int_nregs;
6204 cum->sse_nregs -= sse_nregs;
6205 cum->regno += int_nregs;
6206 cum->sse_regno += sse_nregs;
6210 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6211 cum->words = (cum->words + align - 1) & ~(align - 1);
6212 cum->words += words;
6217 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6218 HOST_WIDE_INT words)
6220 /* Otherwise, this should be passed indirect. */
6221 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6223 cum->words += words;
6231 /* Update the data in CUM to advance over an argument of mode MODE and
6232 data type TYPE. (TYPE is null for libcalls where that information
6233 may not be available.) */
6236 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6237 const_tree type, bool named)
6239 HOST_WIDE_INT bytes, words;
6241 if (mode == BLKmode)
6242 bytes = int_size_in_bytes (type);
6244 bytes = GET_MODE_SIZE (mode);
6245 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6248 mode = type_natural_mode (type, NULL);
6250 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6251 function_arg_advance_ms_64 (cum, bytes, words);
6252 else if (TARGET_64BIT)
6253 function_arg_advance_64 (cum, mode, type, words, named);
6255 function_arg_advance_32 (cum, mode, type, bytes, words);
6258 /* Define where to put the arguments to a function.
6259 Value is zero to push the argument on the stack,
6260 or a hard register in which to store the argument.
6262 MODE is the argument's machine mode.
6263 TYPE is the data type of the argument (as a tree).
6264 This is null for libcalls where that information may
6266 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6267 the preceding args and about the function being called.
6268 NAMED is nonzero if this argument is a named parameter
6269 (otherwise it is an extra parameter matching an ellipsis). */
6272 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6273 enum machine_mode orig_mode, const_tree type,
6274 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6276 static bool warnedsse, warnedmmx;
6278 /* Avoid the AL settings for the Unix64 ABI. */
6279 if (mode == VOIDmode)
6295 if (words <= cum->nregs)
6297 int regno = cum->regno;
6299 /* Fastcall allocates the first two DWORD (SImode) or
6300 smaller arguments to ECX and EDX if it isn't an
6306 || (type && AGGREGATE_TYPE_P (type)))
6309 /* ECX not EAX is the first allocated register. */
6310 if (regno == AX_REG)
6313 return gen_rtx_REG (mode, regno);
6318 if (cum->float_in_sse < 2)
6321 if (cum->float_in_sse < 1)
6325 /* In 32bit, we pass TImode in xmm registers. */
6332 if (!type || !AGGREGATE_TYPE_P (type))
6334 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6337 warning (0, "SSE vector argument without SSE enabled "
6341 return gen_reg_or_parallel (mode, orig_mode,
6342 cum->sse_regno + FIRST_SSE_REG);
6347 /* OImode shouldn't be used directly. */
6356 if (!type || !AGGREGATE_TYPE_P (type))
6359 return gen_reg_or_parallel (mode, orig_mode,
6360 cum->sse_regno + FIRST_SSE_REG);
6370 if (!type || !AGGREGATE_TYPE_P (type))
6372 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6375 warning (0, "MMX vector argument without MMX enabled "
6379 return gen_reg_or_parallel (mode, orig_mode,
6380 cum->mmx_regno + FIRST_MMX_REG);
6389 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6390 enum machine_mode orig_mode, const_tree type, bool named)
6392 /* Handle a hidden AL argument containing number of registers
6393 for varargs x86-64 functions. */
6394 if (mode == VOIDmode)
6395 return GEN_INT (cum->maybe_vaarg
6396 ? (cum->sse_nregs < 0
6397 ? X86_64_SSE_REGPARM_MAX
6412 /* Unnamed 256bit vector mode parameters are passed on stack. */
6418 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6420 &x86_64_int_parameter_registers [cum->regno],
6425 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6426 enum machine_mode orig_mode, bool named,
6427 HOST_WIDE_INT bytes)
6431 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6432 We use value of -2 to specify that current function call is MSABI. */
6433 if (mode == VOIDmode)
6434 return GEN_INT (-2);
6436 /* If we've run out of registers, it goes on the stack. */
6437 if (cum->nregs == 0)
6440 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6442 /* Only floating point modes are passed in anything but integer regs. */
6443 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6446 regno = cum->regno + FIRST_SSE_REG;
6451 /* Unnamed floating parameters are passed in both the
6452 SSE and integer registers. */
6453 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6454 t2 = gen_rtx_REG (mode, regno);
6455 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6456 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6457 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6460 /* Handle aggregated types passed in register. */
6461 if (orig_mode == BLKmode)
6463 if (bytes > 0 && bytes <= 8)
6464 mode = (bytes > 4 ? DImode : SImode);
6465 if (mode == BLKmode)
6469 return gen_reg_or_parallel (mode, orig_mode, regno);
6472 /* Return where to put the arguments to a function.
6473 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6475 MODE is the argument's machine mode. TYPE is the data type of the
6476 argument. It is null for libcalls where that information may not be
6477 available. CUM gives information about the preceding args and about
6478 the function being called. NAMED is nonzero if this argument is a
6479 named parameter (otherwise it is an extra parameter matching an
6483 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6484 const_tree type, bool named)
6486 enum machine_mode mode = omode;
6487 HOST_WIDE_INT bytes, words;
6489 if (mode == BLKmode)
6490 bytes = int_size_in_bytes (type);
6492 bytes = GET_MODE_SIZE (mode);
6493 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6495 /* To simplify the code below, represent vector types with a vector mode
6496 even if MMX/SSE are not active. */
6497 if (type && TREE_CODE (type) == VECTOR_TYPE)
6498 mode = type_natural_mode (type, cum);
6500 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6501 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6502 else if (TARGET_64BIT)
6503 return function_arg_64 (cum, mode, omode, type, named);
6505 return function_arg_32 (cum, mode, omode, type, bytes, words);
6508 /* A C expression that indicates when an argument must be passed by
6509 reference. If nonzero for an argument, a copy of that argument is
6510 made in memory and a pointer to the argument is passed instead of
6511 the argument itself. The pointer is passed in whatever way is
6512 appropriate for passing a pointer to that type. */
6515 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6516 enum machine_mode mode ATTRIBUTE_UNUSED,
6517 const_tree type, bool named ATTRIBUTE_UNUSED)
6519 /* See Windows x64 Software Convention. */
6520 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6522 int msize = (int) GET_MODE_SIZE (mode);
6525 /* Arrays are passed by reference. */
6526 if (TREE_CODE (type) == ARRAY_TYPE)
6529 if (AGGREGATE_TYPE_P (type))
6531 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6532 are passed by reference. */
6533 msize = int_size_in_bytes (type);
6537 /* __m128 is passed by reference. */
6539 case 1: case 2: case 4: case 8:
6545 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6551 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6554 contains_aligned_value_p (const_tree type)
6556 enum machine_mode mode = TYPE_MODE (type);
6557 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6561 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6563 if (TYPE_ALIGN (type) < 128)
6566 if (AGGREGATE_TYPE_P (type))
6568 /* Walk the aggregates recursively. */
6569 switch (TREE_CODE (type))
6573 case QUAL_UNION_TYPE:
6577 /* Walk all the structure fields. */
6578 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6580 if (TREE_CODE (field) == FIELD_DECL
6581 && contains_aligned_value_p (TREE_TYPE (field)))
6588 /* Just for use if some languages passes arrays by value. */
6589 if (contains_aligned_value_p (TREE_TYPE (type)))
6600 /* Gives the alignment boundary, in bits, of an argument with the
6601 specified mode and type. */
6604 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6609 /* Since the main variant type is used for call, we convert it to
6610 the main variant type. */
6611 type = TYPE_MAIN_VARIANT (type);
6612 align = TYPE_ALIGN (type);
6615 align = GET_MODE_ALIGNMENT (mode);
6616 if (align < PARM_BOUNDARY)
6617 align = PARM_BOUNDARY;
6618 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6619 natural boundaries. */
6620 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6622 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6623 make an exception for SSE modes since these require 128bit
6626 The handling here differs from field_alignment. ICC aligns MMX
6627 arguments to 4 byte boundaries, while structure fields are aligned
6628 to 8 byte boundaries. */
6631 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6632 align = PARM_BOUNDARY;
6636 if (!contains_aligned_value_p (type))
6637 align = PARM_BOUNDARY;
6640 if (align > BIGGEST_ALIGNMENT)
6641 align = BIGGEST_ALIGNMENT;
6645 /* Return true if N is a possible register number of function value. */
6648 ix86_function_value_regno_p (const unsigned int regno)
6655 case FIRST_FLOAT_REG:
6656 /* TODO: The function should depend on current function ABI but
6657 builtins.c would need updating then. Therefore we use the
6659 if (TARGET_64BIT && ix86_abi == MS_ABI)
6661 return TARGET_FLOAT_RETURNS_IN_80387;
6667 if (TARGET_MACHO || TARGET_64BIT)
6675 /* Define how to find the value returned by a function.
6676 VALTYPE is the data type of the value (as a tree).
6677 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6678 otherwise, FUNC is 0. */
6681 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6682 const_tree fntype, const_tree fn)
6686 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6687 we normally prevent this case when mmx is not available. However
6688 some ABIs may require the result to be returned like DImode. */
6689 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6690 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6692 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6693 we prevent this case when sse is not available. However some ABIs
6694 may require the result to be returned like integer TImode. */
6695 else if (mode == TImode
6696 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6697 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6699 /* 32-byte vector modes in %ymm0. */
6700 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6701 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6703 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6704 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6705 regno = FIRST_FLOAT_REG;
6707 /* Most things go in %eax. */
6710 /* Override FP return register with %xmm0 for local functions when
6711 SSE math is enabled or for functions with sseregparm attribute. */
6712 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6714 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6715 if ((sse_level >= 1 && mode == SFmode)
6716 || (sse_level == 2 && mode == DFmode))
6717 regno = FIRST_SSE_REG;
6720 /* OImode shouldn't be used directly. */
6721 gcc_assert (mode != OImode);
6723 return gen_rtx_REG (orig_mode, regno);
6727 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6732 /* Handle libcalls, which don't provide a type node. */
6733 if (valtype == NULL)
6745 return gen_rtx_REG (mode, FIRST_SSE_REG);
6748 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6752 return gen_rtx_REG (mode, AX_REG);
6756 ret = construct_container (mode, orig_mode, valtype, 1,
6757 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6758 x86_64_int_return_registers, 0);
6760 /* For zero sized structures, construct_container returns NULL, but we
6761 need to keep rest of compiler happy by returning meaningful value. */
6763 ret = gen_rtx_REG (orig_mode, AX_REG);
6769 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6771 unsigned int regno = AX_REG;
6775 switch (GET_MODE_SIZE (mode))
6778 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6779 && !COMPLEX_MODE_P (mode))
6780 regno = FIRST_SSE_REG;
6784 if (mode == SFmode || mode == DFmode)
6785 regno = FIRST_SSE_REG;
6791 return gen_rtx_REG (orig_mode, regno);
6795 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6796 enum machine_mode orig_mode, enum machine_mode mode)
6798 const_tree fn, fntype;
6801 if (fntype_or_decl && DECL_P (fntype_or_decl))
6802 fn = fntype_or_decl;
6803 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6805 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6806 return function_value_ms_64 (orig_mode, mode);
6807 else if (TARGET_64BIT)
6808 return function_value_64 (orig_mode, mode, valtype);
6810 return function_value_32 (orig_mode, mode, fntype, fn);
6814 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6815 bool outgoing ATTRIBUTE_UNUSED)
6817 enum machine_mode mode, orig_mode;
6819 orig_mode = TYPE_MODE (valtype);
6820 mode = type_natural_mode (valtype, NULL);
6821 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6825 ix86_libcall_value (enum machine_mode mode)
6827 return ix86_function_value_1 (NULL, NULL, mode, mode);
6830 /* Return true iff type is returned in memory. */
6832 static bool ATTRIBUTE_UNUSED
6833 return_in_memory_32 (const_tree type, enum machine_mode mode)
6837 if (mode == BLKmode)
6840 size = int_size_in_bytes (type);
6842 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6845 if (VECTOR_MODE_P (mode) || mode == TImode)
6847 /* User-created vectors small enough to fit in EAX. */
6851 /* MMX/3dNow values are returned in MM0,
6852 except when it doesn't exits. */
6856 /* SSE values are returned in XMM0, except when it doesn't exist. */
6860 /* AVX values are returned in YMM0, except when it doesn't exist. */
6871 /* OImode shouldn't be used directly. */
6872 gcc_assert (mode != OImode);
6877 static bool ATTRIBUTE_UNUSED
6878 return_in_memory_64 (const_tree type, enum machine_mode mode)
6880 int needed_intregs, needed_sseregs;
6881 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6884 static bool ATTRIBUTE_UNUSED
6885 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6887 HOST_WIDE_INT size = int_size_in_bytes (type);
6889 /* __m128 is returned in xmm0. */
6890 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6891 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6894 /* Otherwise, the size must be exactly in [1248]. */
6895 return size != 1 && size != 2 && size != 4 && size != 8;
6899 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6901 #ifdef SUBTARGET_RETURN_IN_MEMORY
6902 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6904 const enum machine_mode mode = type_natural_mode (type, NULL);
6908 if (ix86_function_type_abi (fntype) == MS_ABI)
6909 return return_in_memory_ms_64 (type, mode);
6911 return return_in_memory_64 (type, mode);
6914 return return_in_memory_32 (type, mode);
6918 /* Return false iff TYPE is returned in memory. This version is used
6919 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6920 but differs notably in that when MMX is available, 8-byte vectors
6921 are returned in memory, rather than in MMX registers. */
6924 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6927 enum machine_mode mode = type_natural_mode (type, NULL);
6930 return return_in_memory_64 (type, mode);
6932 if (mode == BLKmode)
6935 size = int_size_in_bytes (type);
6937 if (VECTOR_MODE_P (mode))
6939 /* Return in memory only if MMX registers *are* available. This
6940 seems backwards, but it is consistent with the existing
6947 else if (mode == TImode)
6949 else if (mode == XFmode)
6955 /* When returning SSE vector types, we have a choice of either
6956 (1) being abi incompatible with a -march switch, or
6957 (2) generating an error.
6958 Given no good solution, I think the safest thing is one warning.
6959 The user won't be able to use -Werror, but....
6961 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6962 called in response to actually generating a caller or callee that
6963 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6964 via aggregate_value_p for general type probing from tree-ssa. */
6967 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6969 static bool warnedsse, warnedmmx;
6971 if (!TARGET_64BIT && type)
6973 /* Look at the return type of the function, not the function type. */
6974 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6976 if (!TARGET_SSE && !warnedsse)
6979 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6982 warning (0, "SSE vector return without SSE enabled "
6987 if (!TARGET_MMX && !warnedmmx)
6989 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6992 warning (0, "MMX vector return without MMX enabled "
7002 /* Create the va_list data type. */
7004 /* Returns the calling convention specific va_list date type.
7005 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7008 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7010 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7012 /* For i386 we use plain pointer to argument area. */
7013 if (!TARGET_64BIT || abi == MS_ABI)
7014 return build_pointer_type (char_type_node);
7016 record = lang_hooks.types.make_type (RECORD_TYPE);
7017 type_decl = build_decl (BUILTINS_LOCATION,
7018 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7020 f_gpr = build_decl (BUILTINS_LOCATION,
7021 FIELD_DECL, get_identifier ("gp_offset"),
7022 unsigned_type_node);
7023 f_fpr = build_decl (BUILTINS_LOCATION,
7024 FIELD_DECL, get_identifier ("fp_offset"),
7025 unsigned_type_node);
7026 f_ovf = build_decl (BUILTINS_LOCATION,
7027 FIELD_DECL, get_identifier ("overflow_arg_area"),
7029 f_sav = build_decl (BUILTINS_LOCATION,
7030 FIELD_DECL, get_identifier ("reg_save_area"),
7033 va_list_gpr_counter_field = f_gpr;
7034 va_list_fpr_counter_field = f_fpr;
7036 DECL_FIELD_CONTEXT (f_gpr) = record;
7037 DECL_FIELD_CONTEXT (f_fpr) = record;
7038 DECL_FIELD_CONTEXT (f_ovf) = record;
7039 DECL_FIELD_CONTEXT (f_sav) = record;
7041 TREE_CHAIN (record) = type_decl;
7042 TYPE_NAME (record) = type_decl;
7043 TYPE_FIELDS (record) = f_gpr;
7044 DECL_CHAIN (f_gpr) = f_fpr;
7045 DECL_CHAIN (f_fpr) = f_ovf;
7046 DECL_CHAIN (f_ovf) = f_sav;
7048 layout_type (record);
7050 /* The correct type is an array type of one element. */
7051 return build_array_type (record, build_index_type (size_zero_node));
7054 /* Setup the builtin va_list data type and for 64-bit the additional
7055 calling convention specific va_list data types. */
7058 ix86_build_builtin_va_list (void)
7060 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7062 /* Initialize abi specific va_list builtin types. */
7066 if (ix86_abi == MS_ABI)
7068 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7069 if (TREE_CODE (t) != RECORD_TYPE)
7070 t = build_variant_type_copy (t);
7071 sysv_va_list_type_node = t;
7076 if (TREE_CODE (t) != RECORD_TYPE)
7077 t = build_variant_type_copy (t);
7078 sysv_va_list_type_node = t;
7080 if (ix86_abi != MS_ABI)
7082 t = ix86_build_builtin_va_list_abi (MS_ABI);
7083 if (TREE_CODE (t) != RECORD_TYPE)
7084 t = build_variant_type_copy (t);
7085 ms_va_list_type_node = t;
7090 if (TREE_CODE (t) != RECORD_TYPE)
7091 t = build_variant_type_copy (t);
7092 ms_va_list_type_node = t;
7099 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7102 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7108 /* GPR size of varargs save area. */
7109 if (cfun->va_list_gpr_size)
7110 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7112 ix86_varargs_gpr_size = 0;
7114 /* FPR size of varargs save area. We don't need it if we don't pass
7115 anything in SSE registers. */
7116 if (TARGET_SSE && cfun->va_list_fpr_size)
7117 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7119 ix86_varargs_fpr_size = 0;
7121 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7124 save_area = frame_pointer_rtx;
7125 set = get_varargs_alias_set ();
7127 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7128 if (max > X86_64_REGPARM_MAX)
7129 max = X86_64_REGPARM_MAX;
7131 for (i = cum->regno; i < max; i++)
7133 mem = gen_rtx_MEM (Pmode,
7134 plus_constant (save_area, i * UNITS_PER_WORD));
7135 MEM_NOTRAP_P (mem) = 1;
7136 set_mem_alias_set (mem, set);
7137 emit_move_insn (mem, gen_rtx_REG (Pmode,
7138 x86_64_int_parameter_registers[i]));
7141 if (ix86_varargs_fpr_size)
7143 enum machine_mode smode;
7146 /* Now emit code to save SSE registers. The AX parameter contains number
7147 of SSE parameter registers used to call this function, though all we
7148 actually check here is the zero/non-zero status. */
7150 label = gen_label_rtx ();
7151 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7152 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7155 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7156 we used movdqa (i.e. TImode) instead? Perhaps even better would
7157 be if we could determine the real mode of the data, via a hook
7158 into pass_stdarg. Ignore all that for now. */
7160 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7161 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7163 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7164 if (max > X86_64_SSE_REGPARM_MAX)
7165 max = X86_64_SSE_REGPARM_MAX;
7167 for (i = cum->sse_regno; i < max; ++i)
7169 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7170 mem = gen_rtx_MEM (smode, mem);
7171 MEM_NOTRAP_P (mem) = 1;
7172 set_mem_alias_set (mem, set);
7173 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7175 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7183 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7185 alias_set_type set = get_varargs_alias_set ();
7188 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7192 mem = gen_rtx_MEM (Pmode,
7193 plus_constant (virtual_incoming_args_rtx,
7194 i * UNITS_PER_WORD));
7195 MEM_NOTRAP_P (mem) = 1;
7196 set_mem_alias_set (mem, set);
7198 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7199 emit_move_insn (mem, reg);
7204 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7205 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7208 CUMULATIVE_ARGS next_cum;
7211 /* This argument doesn't appear to be used anymore. Which is good,
7212 because the old code here didn't suppress rtl generation. */
7213 gcc_assert (!no_rtl);
7218 fntype = TREE_TYPE (current_function_decl);
7220 /* For varargs, we do not want to skip the dummy va_dcl argument.
7221 For stdargs, we do want to skip the last named argument. */
7223 if (stdarg_p (fntype))
7224 ix86_function_arg_advance (&next_cum, mode, type, true);
7226 if (cum->call_abi == MS_ABI)
7227 setup_incoming_varargs_ms_64 (&next_cum);
7229 setup_incoming_varargs_64 (&next_cum);
7232 /* Checks if TYPE is of kind va_list char *. */
7235 is_va_list_char_pointer (tree type)
7239 /* For 32-bit it is always true. */
7242 canonic = ix86_canonical_va_list_type (type);
7243 return (canonic == ms_va_list_type_node
7244 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7247 /* Implement va_start. */
7250 ix86_va_start (tree valist, rtx nextarg)
7252 HOST_WIDE_INT words, n_gpr, n_fpr;
7253 tree f_gpr, f_fpr, f_ovf, f_sav;
7254 tree gpr, fpr, ovf, sav, t;
7259 if (flag_split_stack
7260 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7262 unsigned int scratch_regno;
7264 /* When we are splitting the stack, we can't refer to the stack
7265 arguments using internal_arg_pointer, because they may be on
7266 the old stack. The split stack prologue will arrange to
7267 leave a pointer to the old stack arguments in a scratch
7268 register, which we here copy to a pseudo-register. The split
7269 stack prologue can't set the pseudo-register directly because
7270 it (the prologue) runs before any registers have been saved. */
7272 scratch_regno = split_stack_prologue_scratch_regno ();
7273 if (scratch_regno != INVALID_REGNUM)
7277 reg = gen_reg_rtx (Pmode);
7278 cfun->machine->split_stack_varargs_pointer = reg;
7281 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7285 push_topmost_sequence ();
7286 emit_insn_after (seq, entry_of_function ());
7287 pop_topmost_sequence ();
7291 /* Only 64bit target needs something special. */
7292 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7294 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7295 std_expand_builtin_va_start (valist, nextarg);
7300 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7301 next = expand_binop (ptr_mode, add_optab,
7302 cfun->machine->split_stack_varargs_pointer,
7303 crtl->args.arg_offset_rtx,
7304 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7305 convert_move (va_r, next, 0);
7310 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7311 f_fpr = DECL_CHAIN (f_gpr);
7312 f_ovf = DECL_CHAIN (f_fpr);
7313 f_sav = DECL_CHAIN (f_ovf);
7315 valist = build_simple_mem_ref (valist);
7316 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7317 /* The following should be folded into the MEM_REF offset. */
7318 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7320 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7322 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7324 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7327 /* Count number of gp and fp argument registers used. */
7328 words = crtl->args.info.words;
7329 n_gpr = crtl->args.info.regno;
7330 n_fpr = crtl->args.info.sse_regno;
7332 if (cfun->va_list_gpr_size)
7334 type = TREE_TYPE (gpr);
7335 t = build2 (MODIFY_EXPR, type,
7336 gpr, build_int_cst (type, n_gpr * 8));
7337 TREE_SIDE_EFFECTS (t) = 1;
7338 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7341 if (TARGET_SSE && cfun->va_list_fpr_size)
7343 type = TREE_TYPE (fpr);
7344 t = build2 (MODIFY_EXPR, type, fpr,
7345 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7346 TREE_SIDE_EFFECTS (t) = 1;
7347 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7350 /* Find the overflow area. */
7351 type = TREE_TYPE (ovf);
7352 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7353 ovf_rtx = crtl->args.internal_arg_pointer;
7355 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7356 t = make_tree (type, ovf_rtx);
7358 t = build2 (POINTER_PLUS_EXPR, type, t,
7359 size_int (words * UNITS_PER_WORD));
7360 t = build2 (MODIFY_EXPR, type, ovf, t);
7361 TREE_SIDE_EFFECTS (t) = 1;
7362 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7364 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7366 /* Find the register save area.
7367 Prologue of the function save it right above stack frame. */
7368 type = TREE_TYPE (sav);
7369 t = make_tree (type, frame_pointer_rtx);
7370 if (!ix86_varargs_gpr_size)
7371 t = build2 (POINTER_PLUS_EXPR, type, t,
7372 size_int (-8 * X86_64_REGPARM_MAX));
7373 t = build2 (MODIFY_EXPR, type, sav, t);
7374 TREE_SIDE_EFFECTS (t) = 1;
7375 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7379 /* Implement va_arg. */
7382 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7385 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7386 tree f_gpr, f_fpr, f_ovf, f_sav;
7387 tree gpr, fpr, ovf, sav, t;
7389 tree lab_false, lab_over = NULL_TREE;
7394 enum machine_mode nat_mode;
7395 unsigned int arg_boundary;
7397 /* Only 64bit target needs something special. */
7398 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7399 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7401 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7402 f_fpr = DECL_CHAIN (f_gpr);
7403 f_ovf = DECL_CHAIN (f_fpr);
7404 f_sav = DECL_CHAIN (f_ovf);
7406 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7407 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7408 valist = build_va_arg_indirect_ref (valist);
7409 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7410 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7411 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7413 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7415 type = build_pointer_type (type);
7416 size = int_size_in_bytes (type);
7417 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7419 nat_mode = type_natural_mode (type, NULL);
7428 /* Unnamed 256bit vector mode parameters are passed on stack. */
7429 if (ix86_cfun_abi () == SYSV_ABI)
7436 container = construct_container (nat_mode, TYPE_MODE (type),
7437 type, 0, X86_64_REGPARM_MAX,
7438 X86_64_SSE_REGPARM_MAX, intreg,
7443 /* Pull the value out of the saved registers. */
7445 addr = create_tmp_var (ptr_type_node, "addr");
7449 int needed_intregs, needed_sseregs;
7451 tree int_addr, sse_addr;
7453 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7454 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7456 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7458 need_temp = (!REG_P (container)
7459 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7460 || TYPE_ALIGN (type) > 128));
7462 /* In case we are passing structure, verify that it is consecutive block
7463 on the register save area. If not we need to do moves. */
7464 if (!need_temp && !REG_P (container))
7466 /* Verify that all registers are strictly consecutive */
7467 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7471 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7473 rtx slot = XVECEXP (container, 0, i);
7474 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7475 || INTVAL (XEXP (slot, 1)) != i * 16)
7483 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7485 rtx slot = XVECEXP (container, 0, i);
7486 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7487 || INTVAL (XEXP (slot, 1)) != i * 8)
7499 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7500 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7503 /* First ensure that we fit completely in registers. */
7506 t = build_int_cst (TREE_TYPE (gpr),
7507 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7508 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7509 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7510 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7511 gimplify_and_add (t, pre_p);
7515 t = build_int_cst (TREE_TYPE (fpr),
7516 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7517 + X86_64_REGPARM_MAX * 8);
7518 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7519 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7520 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7521 gimplify_and_add (t, pre_p);
7524 /* Compute index to start of area used for integer regs. */
7527 /* int_addr = gpr + sav; */
7528 t = fold_convert (sizetype, gpr);
7529 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7530 gimplify_assign (int_addr, t, pre_p);
7534 /* sse_addr = fpr + sav; */
7535 t = fold_convert (sizetype, fpr);
7536 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7537 gimplify_assign (sse_addr, t, pre_p);
7541 int i, prev_size = 0;
7542 tree temp = create_tmp_var (type, "va_arg_tmp");
7545 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7546 gimplify_assign (addr, t, pre_p);
7548 for (i = 0; i < XVECLEN (container, 0); i++)
7550 rtx slot = XVECEXP (container, 0, i);
7551 rtx reg = XEXP (slot, 0);
7552 enum machine_mode mode = GET_MODE (reg);
7558 tree dest_addr, dest;
7559 int cur_size = GET_MODE_SIZE (mode);
7561 if (prev_size + cur_size > size)
7563 cur_size = size - prev_size;
7564 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7565 if (mode == BLKmode)
7568 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7569 if (mode == GET_MODE (reg))
7570 addr_type = build_pointer_type (piece_type);
7572 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7574 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7577 if (SSE_REGNO_P (REGNO (reg)))
7579 src_addr = sse_addr;
7580 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7584 src_addr = int_addr;
7585 src_offset = REGNO (reg) * 8;
7587 src_addr = fold_convert (addr_type, src_addr);
7588 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7589 size_int (src_offset));
7591 dest_addr = fold_convert (daddr_type, addr);
7592 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7593 size_int (INTVAL (XEXP (slot, 1))));
7594 if (cur_size == GET_MODE_SIZE (mode))
7596 src = build_va_arg_indirect_ref (src_addr);
7597 dest = build_va_arg_indirect_ref (dest_addr);
7599 gimplify_assign (dest, src, pre_p);
7604 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7605 3, dest_addr, src_addr,
7606 size_int (cur_size));
7607 gimplify_and_add (copy, pre_p);
7609 prev_size += cur_size;
7615 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7616 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7617 gimplify_assign (gpr, t, pre_p);
7622 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7623 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7624 gimplify_assign (fpr, t, pre_p);
7627 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7629 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7632 /* ... otherwise out of the overflow area. */
7634 /* When we align parameter on stack for caller, if the parameter
7635 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7636 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7637 here with caller. */
7638 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7639 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7640 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7642 /* Care for on-stack alignment if needed. */
7643 if (arg_boundary <= 64 || size == 0)
7647 HOST_WIDE_INT align = arg_boundary / 8;
7648 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7649 size_int (align - 1));
7650 t = fold_convert (sizetype, t);
7651 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7653 t = fold_convert (TREE_TYPE (ovf), t);
7656 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7657 gimplify_assign (addr, t, pre_p);
7659 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7660 size_int (rsize * UNITS_PER_WORD));
7661 gimplify_assign (unshare_expr (ovf), t, pre_p);
7664 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7666 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7667 addr = fold_convert (ptrtype, addr);
7670 addr = build_va_arg_indirect_ref (addr);
7671 return build_va_arg_indirect_ref (addr);
7674 /* Return true if OPNUM's MEM should be matched
7675 in movabs* patterns. */
7678 ix86_check_movabs (rtx insn, int opnum)
7682 set = PATTERN (insn);
7683 if (GET_CODE (set) == PARALLEL)
7684 set = XVECEXP (set, 0, 0);
7685 gcc_assert (GET_CODE (set) == SET);
7686 mem = XEXP (set, opnum);
7687 while (GET_CODE (mem) == SUBREG)
7688 mem = SUBREG_REG (mem);
7689 gcc_assert (MEM_P (mem));
7690 return volatile_ok || !MEM_VOLATILE_P (mem);
7693 /* Initialize the table of extra 80387 mathematical constants. */
7696 init_ext_80387_constants (void)
7698 static const char * cst[5] =
7700 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7701 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7702 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7703 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7704 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7708 for (i = 0; i < 5; i++)
7710 real_from_string (&ext_80387_constants_table[i], cst[i]);
7711 /* Ensure each constant is rounded to XFmode precision. */
7712 real_convert (&ext_80387_constants_table[i],
7713 XFmode, &ext_80387_constants_table[i]);
7716 ext_80387_constants_init = 1;
7719 /* Return non-zero if the constant is something that
7720 can be loaded with a special instruction. */
7723 standard_80387_constant_p (rtx x)
7725 enum machine_mode mode = GET_MODE (x);
7729 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7732 if (x == CONST0_RTX (mode))
7734 if (x == CONST1_RTX (mode))
7737 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7739 /* For XFmode constants, try to find a special 80387 instruction when
7740 optimizing for size or on those CPUs that benefit from them. */
7742 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7746 if (! ext_80387_constants_init)
7747 init_ext_80387_constants ();
7749 for (i = 0; i < 5; i++)
7750 if (real_identical (&r, &ext_80387_constants_table[i]))
7754 /* Load of the constant -0.0 or -1.0 will be split as
7755 fldz;fchs or fld1;fchs sequence. */
7756 if (real_isnegzero (&r))
7758 if (real_identical (&r, &dconstm1))
7764 /* Return the opcode of the special instruction to be used to load
7768 standard_80387_constant_opcode (rtx x)
7770 switch (standard_80387_constant_p (x))
7794 /* Return the CONST_DOUBLE representing the 80387 constant that is
7795 loaded by the specified special instruction. The argument IDX
7796 matches the return value from standard_80387_constant_p. */
7799 standard_80387_constant_rtx (int idx)
7803 if (! ext_80387_constants_init)
7804 init_ext_80387_constants ();
7820 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7824 /* Return 1 if X is all 0s and 2 if x is all 1s
7825 in supported SSE vector mode. */
7828 standard_sse_constant_p (rtx x)
7830 enum machine_mode mode = GET_MODE (x);
7832 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7834 if (vector_all_ones_operand (x, mode))
7850 /* Return the opcode of the special instruction to be used to load
7854 standard_sse_constant_opcode (rtx insn, rtx x)
7856 switch (standard_sse_constant_p (x))
7859 switch (get_attr_mode (insn))
7862 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7864 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7865 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7867 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7869 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7870 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7872 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7874 return "vxorps\t%x0, %x0, %x0";
7876 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7877 return "vxorps\t%x0, %x0, %x0";
7879 return "vxorpd\t%x0, %x0, %x0";
7881 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7882 return "vxorps\t%x0, %x0, %x0";
7884 return "vpxor\t%x0, %x0, %x0";
7889 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7896 /* Returns true if OP contains a symbol reference */
7899 symbolic_reference_mentioned_p (rtx op)
7904 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7907 fmt = GET_RTX_FORMAT (GET_CODE (op));
7908 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7914 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7915 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7919 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7926 /* Return true if it is appropriate to emit `ret' instructions in the
7927 body of a function. Do this only if the epilogue is simple, needing a
7928 couple of insns. Prior to reloading, we can't tell how many registers
7929 must be saved, so return false then. Return false if there is no frame
7930 marker to de-allocate. */
7933 ix86_can_use_return_insn_p (void)
7935 struct ix86_frame frame;
7937 if (! reload_completed || frame_pointer_needed)
7940 /* Don't allow more than 32k pop, since that's all we can do
7941 with one instruction. */
7942 if (crtl->args.pops_args && crtl->args.size >= 32768)
7945 ix86_compute_frame_layout (&frame);
7946 return (frame.stack_pointer_offset == UNITS_PER_WORD
7947 && (frame.nregs + frame.nsseregs) == 0);
7950 /* Value should be nonzero if functions must have frame pointers.
7951 Zero means the frame pointer need not be set up (and parms may
7952 be accessed via the stack pointer) in functions that seem suitable. */
7955 ix86_frame_pointer_required (void)
7957 /* If we accessed previous frames, then the generated code expects
7958 to be able to access the saved ebp value in our frame. */
7959 if (cfun->machine->accesses_prev_frame)
7962 /* Several x86 os'es need a frame pointer for other reasons,
7963 usually pertaining to setjmp. */
7964 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7967 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7968 turns off the frame pointer by default. Turn it back on now if
7969 we've not got a leaf function. */
7970 if (TARGET_OMIT_LEAF_FRAME_POINTER
7971 && (!current_function_is_leaf
7972 || ix86_current_function_calls_tls_descriptor))
7975 if (crtl->profile && !flag_fentry)
7981 /* Record that the current function accesses previous call frames. */
7984 ix86_setup_frame_addresses (void)
7986 cfun->machine->accesses_prev_frame = 1;
7989 #ifndef USE_HIDDEN_LINKONCE
7990 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7991 # define USE_HIDDEN_LINKONCE 1
7993 # define USE_HIDDEN_LINKONCE 0
7997 static int pic_labels_used;
7999 /* Fills in the label name that should be used for a pc thunk for
8000 the given register. */
8003 get_pc_thunk_name (char name[32], unsigned int regno)
8005 gcc_assert (!TARGET_64BIT);
8007 if (USE_HIDDEN_LINKONCE)
8008 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8010 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8014 /* This function generates code for -fpic that loads %ebx with
8015 the return address of the caller and then returns. */
8018 ix86_code_end (void)
8023 for (regno = AX_REG; regno <= SP_REG; regno++)
8028 if (!(pic_labels_used & (1 << regno)))
8031 get_pc_thunk_name (name, regno);
8033 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8034 get_identifier (name),
8035 build_function_type (void_type_node, void_list_node));
8036 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8037 NULL_TREE, void_type_node);
8038 TREE_PUBLIC (decl) = 1;
8039 TREE_STATIC (decl) = 1;
8044 switch_to_section (darwin_sections[text_coal_section]);
8045 fputs ("\t.weak_definition\t", asm_out_file);
8046 assemble_name (asm_out_file, name);
8047 fputs ("\n\t.private_extern\t", asm_out_file);
8048 assemble_name (asm_out_file, name);
8049 putc ('\n', asm_out_file);
8050 ASM_OUTPUT_LABEL (asm_out_file, name);
8051 DECL_WEAK (decl) = 1;
8055 if (USE_HIDDEN_LINKONCE)
8057 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8059 targetm.asm_out.unique_section (decl, 0);
8060 switch_to_section (get_named_section (decl, NULL, 0));
8062 targetm.asm_out.globalize_label (asm_out_file, name);
8063 fputs ("\t.hidden\t", asm_out_file);
8064 assemble_name (asm_out_file, name);
8065 putc ('\n', asm_out_file);
8066 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8070 switch_to_section (text_section);
8071 ASM_OUTPUT_LABEL (asm_out_file, name);
8074 DECL_INITIAL (decl) = make_node (BLOCK);
8075 current_function_decl = decl;
8076 init_function_start (decl);
8077 first_function_block_is_cold = false;
8078 /* Make sure unwind info is emitted for the thunk if needed. */
8079 final_start_function (emit_barrier (), asm_out_file, 1);
8081 /* Pad stack IP move with 4 instructions (two NOPs count
8082 as one instruction). */
8083 if (TARGET_PAD_SHORT_FUNCTION)
8088 fputs ("\tnop\n", asm_out_file);
8091 xops[0] = gen_rtx_REG (Pmode, regno);
8092 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8093 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8094 fputs ("\tret\n", asm_out_file);
8095 final_end_function ();
8096 init_insn_lengths ();
8097 free_after_compilation (cfun);
8099 current_function_decl = NULL;
8102 if (flag_split_stack)
8103 file_end_indicate_split_stack ();
8106 /* Emit code for the SET_GOT patterns. */
8109 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8115 if (TARGET_VXWORKS_RTP && flag_pic)
8117 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8118 xops[2] = gen_rtx_MEM (Pmode,
8119 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8120 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8122 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8123 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8124 an unadorned address. */
8125 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8126 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8127 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8131 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8133 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8135 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8138 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8141 output_asm_insn ("call\t%a2", xops);
8142 #ifdef DWARF2_UNWIND_INFO
8143 /* The call to next label acts as a push. */
8144 if (dwarf2out_do_frame ())
8148 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8149 gen_rtx_PLUS (Pmode,
8152 RTX_FRAME_RELATED_P (insn) = 1;
8153 dwarf2out_frame_debug (insn, true);
8160 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8161 is what will be referenced by the Mach-O PIC subsystem. */
8163 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8166 targetm.asm_out.internal_label (asm_out_file, "L",
8167 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8171 output_asm_insn ("pop%z0\t%0", xops);
8172 #ifdef DWARF2_UNWIND_INFO
8173 /* The pop is a pop and clobbers dest, but doesn't restore it
8174 for unwind info purposes. */
8175 if (dwarf2out_do_frame ())
8179 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8180 dwarf2out_frame_debug (insn, true);
8181 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8182 gen_rtx_PLUS (Pmode,
8185 RTX_FRAME_RELATED_P (insn) = 1;
8186 dwarf2out_frame_debug (insn, true);
8195 get_pc_thunk_name (name, REGNO (dest));
8196 pic_labels_used |= 1 << REGNO (dest);
8198 #ifdef DWARF2_UNWIND_INFO
8199 /* Ensure all queued register saves are flushed before the
8201 if (dwarf2out_do_frame ())
8202 dwarf2out_flush_queued_reg_saves ();
8204 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8205 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8206 output_asm_insn ("call\t%X2", xops);
8207 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8208 is what will be referenced by the Mach-O PIC subsystem. */
8211 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8213 targetm.asm_out.internal_label (asm_out_file, "L",
8214 CODE_LABEL_NUMBER (label));
8221 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8222 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8224 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8229 /* Generate an "push" pattern for input ARG. */
8234 struct machine_function *m = cfun->machine;
8236 if (m->fs.cfa_reg == stack_pointer_rtx)
8237 m->fs.cfa_offset += UNITS_PER_WORD;
8238 m->fs.sp_offset += UNITS_PER_WORD;
8240 return gen_rtx_SET (VOIDmode,
8242 gen_rtx_PRE_DEC (Pmode,
8243 stack_pointer_rtx)),
8247 /* Generate an "pop" pattern for input ARG. */
8252 return gen_rtx_SET (VOIDmode,
8255 gen_rtx_POST_INC (Pmode,
8256 stack_pointer_rtx)));
8259 /* Return >= 0 if there is an unused call-clobbered register available
8260 for the entire function. */
8263 ix86_select_alt_pic_regnum (void)
8265 if (current_function_is_leaf
8267 && !ix86_current_function_calls_tls_descriptor)
8270 /* Can't use the same register for both PIC and DRAP. */
8272 drap = REGNO (crtl->drap_reg);
8275 for (i = 2; i >= 0; --i)
8276 if (i != drap && !df_regs_ever_live_p (i))
8280 return INVALID_REGNUM;
8283 /* Return 1 if we need to save REGNO. */
8285 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8287 if (pic_offset_table_rtx
8288 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8289 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8291 || crtl->calls_eh_return
8292 || crtl->uses_const_pool))
8294 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8299 if (crtl->calls_eh_return && maybe_eh_return)
8304 unsigned test = EH_RETURN_DATA_REGNO (i);
8305 if (test == INVALID_REGNUM)
8312 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8315 return (df_regs_ever_live_p (regno)
8316 && !call_used_regs[regno]
8317 && !fixed_regs[regno]
8318 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8321 /* Return number of saved general prupose registers. */
8324 ix86_nsaved_regs (void)
8329 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8330 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8335 /* Return number of saved SSE registrers. */
8338 ix86_nsaved_sseregs (void)
8343 if (ix86_cfun_abi () != MS_ABI)
8345 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8346 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8351 /* Given FROM and TO register numbers, say whether this elimination is
8352 allowed. If stack alignment is needed, we can only replace argument
8353 pointer with hard frame pointer, or replace frame pointer with stack
8354 pointer. Otherwise, frame pointer elimination is automatically
8355 handled and all other eliminations are valid. */
8358 ix86_can_eliminate (const int from, const int to)
8360 if (stack_realign_fp)
8361 return ((from == ARG_POINTER_REGNUM
8362 && to == HARD_FRAME_POINTER_REGNUM)
8363 || (from == FRAME_POINTER_REGNUM
8364 && to == STACK_POINTER_REGNUM));
8366 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8369 /* Return the offset between two registers, one to be eliminated, and the other
8370 its replacement, at the start of a routine. */
8373 ix86_initial_elimination_offset (int from, int to)
8375 struct ix86_frame frame;
8376 ix86_compute_frame_layout (&frame);
8378 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8379 return frame.hard_frame_pointer_offset;
8380 else if (from == FRAME_POINTER_REGNUM
8381 && to == HARD_FRAME_POINTER_REGNUM)
8382 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8385 gcc_assert (to == STACK_POINTER_REGNUM);
8387 if (from == ARG_POINTER_REGNUM)
8388 return frame.stack_pointer_offset;
8390 gcc_assert (from == FRAME_POINTER_REGNUM);
8391 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8395 /* In a dynamically-aligned function, we can't know the offset from
8396 stack pointer to frame pointer, so we must ensure that setjmp
8397 eliminates fp against the hard fp (%ebp) rather than trying to
8398 index from %esp up to the top of the frame across a gap that is
8399 of unknown (at compile-time) size. */
8401 ix86_builtin_setjmp_frame_value (void)
8403 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8406 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8407 field in the TCB, so they can not be used together. */
8410 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8414 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8416 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8423 /* When using -fsplit-stack, the allocation routines set a field in
8424 the TCB to the bottom of the stack plus this much space, measured
8427 #define SPLIT_STACK_AVAILABLE 256
8429 /* Fill structure ix86_frame about frame of currently computed function. */
8432 ix86_compute_frame_layout (struct ix86_frame *frame)
8434 unsigned int stack_alignment_needed;
8435 HOST_WIDE_INT offset;
8436 unsigned int preferred_alignment;
8437 HOST_WIDE_INT size = get_frame_size ();
8438 HOST_WIDE_INT to_allocate;
8440 frame->nregs = ix86_nsaved_regs ();
8441 frame->nsseregs = ix86_nsaved_sseregs ();
8443 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8444 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8446 /* MS ABI seem to require stack alignment to be always 16 except for function
8447 prologues and leaf. */
8448 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8449 && (!current_function_is_leaf || cfun->calls_alloca != 0
8450 || ix86_current_function_calls_tls_descriptor))
8452 preferred_alignment = 16;
8453 stack_alignment_needed = 16;
8454 crtl->preferred_stack_boundary = 128;
8455 crtl->stack_alignment_needed = 128;
8458 gcc_assert (!size || stack_alignment_needed);
8459 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8460 gcc_assert (preferred_alignment <= stack_alignment_needed);
8462 /* During reload iteration the amount of registers saved can change.
8463 Recompute the value as needed. Do not recompute when amount of registers
8464 didn't change as reload does multiple calls to the function and does not
8465 expect the decision to change within single iteration. */
8466 if (!optimize_function_for_size_p (cfun)
8467 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8469 int count = frame->nregs;
8470 struct cgraph_node *node = cgraph_node (current_function_decl);
8472 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8473 /* The fast prologue uses move instead of push to save registers. This
8474 is significantly longer, but also executes faster as modern hardware
8475 can execute the moves in parallel, but can't do that for push/pop.
8477 Be careful about choosing what prologue to emit: When function takes
8478 many instructions to execute we may use slow version as well as in
8479 case function is known to be outside hot spot (this is known with
8480 feedback only). Weight the size of function by number of registers
8481 to save as it is cheap to use one or two push instructions but very
8482 slow to use many of them. */
8484 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8485 if (node->frequency < NODE_FREQUENCY_NORMAL
8486 || (flag_branch_probabilities
8487 && node->frequency < NODE_FREQUENCY_HOT))
8488 cfun->machine->use_fast_prologue_epilogue = false;
8490 cfun->machine->use_fast_prologue_epilogue
8491 = !expensive_function_p (count);
8493 if (TARGET_PROLOGUE_USING_MOVE
8494 && cfun->machine->use_fast_prologue_epilogue)
8495 frame->save_regs_using_mov = true;
8497 frame->save_regs_using_mov = false;
8499 /* If static stack checking is enabled and done with probes, the registers
8500 need to be saved before allocating the frame. */
8501 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8502 frame->save_regs_using_mov = false;
8504 /* Skip return address. */
8505 offset = UNITS_PER_WORD;
8507 /* Skip pushed static chain. */
8508 if (ix86_static_chain_on_stack)
8509 offset += UNITS_PER_WORD;
8511 /* Skip saved base pointer. */
8512 if (frame_pointer_needed)
8513 offset += UNITS_PER_WORD;
8515 frame->hard_frame_pointer_offset = offset;
8517 /* Register save area */
8518 offset += frame->nregs * UNITS_PER_WORD;
8519 frame->reg_save_offset = offset;
8521 /* Align and set SSE register save area. */
8522 if (frame->nsseregs)
8524 /* The only ABI that has saved SSE registers (Win64) also has a
8525 16-byte aligned default stack, and thus we don't need to be
8526 within the re-aligned local stack frame to save them. */
8527 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8528 offset = (offset + 16 - 1) & -16;
8529 offset += frame->nsseregs * 16;
8531 frame->sse_reg_save_offset = offset;
8533 /* The re-aligned stack starts here. Values before this point are not
8534 directly comparable with values below this point. In order to make
8535 sure that no value happens to be the same before and after, force
8536 the alignment computation below to add a non-zero value. */
8537 if (stack_realign_fp)
8538 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8541 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8542 offset += frame->va_arg_size;
8544 /* Align start of frame for local function. */
8545 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8547 /* Frame pointer points here. */
8548 frame->frame_pointer_offset = offset;
8552 /* Add outgoing arguments area. Can be skipped if we eliminated
8553 all the function calls as dead code.
8554 Skipping is however impossible when function calls alloca. Alloca
8555 expander assumes that last crtl->outgoing_args_size
8556 of stack frame are unused. */
8557 if (ACCUMULATE_OUTGOING_ARGS
8558 && (!current_function_is_leaf || cfun->calls_alloca
8559 || ix86_current_function_calls_tls_descriptor))
8561 offset += crtl->outgoing_args_size;
8562 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8565 frame->outgoing_arguments_size = 0;
8567 /* Align stack boundary. Only needed if we're calling another function
8569 if (!current_function_is_leaf || cfun->calls_alloca
8570 || ix86_current_function_calls_tls_descriptor)
8571 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8573 /* We've reached end of stack frame. */
8574 frame->stack_pointer_offset = offset;
8576 /* Size prologue needs to allocate. */
8577 to_allocate = offset - frame->sse_reg_save_offset;
8579 if ((!to_allocate && frame->nregs <= 1)
8580 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8581 frame->save_regs_using_mov = false;
8583 if (ix86_using_red_zone ()
8584 && current_function_sp_is_unchanging
8585 && current_function_is_leaf
8586 && !ix86_current_function_calls_tls_descriptor)
8588 frame->red_zone_size = to_allocate;
8589 if (frame->save_regs_using_mov)
8590 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8591 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8592 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8595 frame->red_zone_size = 0;
8596 frame->stack_pointer_offset -= frame->red_zone_size;
8599 /* This is semi-inlined memory_address_length, but simplified
8600 since we know that we're always dealing with reg+offset, and
8601 to avoid having to create and discard all that rtl. */
8604 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8610 /* EBP and R13 cannot be encoded without an offset. */
8611 len = (regno == BP_REG || regno == R13_REG);
8613 else if (IN_RANGE (offset, -128, 127))
8616 /* ESP and R12 must be encoded with a SIB byte. */
8617 if (regno == SP_REG || regno == R12_REG)
8623 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8624 The valid base registers are taken from CFUN->MACHINE->FS. */
8627 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8629 const struct machine_function *m = cfun->machine;
8630 rtx base_reg = NULL;
8631 HOST_WIDE_INT base_offset = 0;
8633 if (m->use_fast_prologue_epilogue)
8635 /* Choose the base register most likely to allow the most scheduling
8636 opportunities. Generally FP is valid througout the function,
8637 while DRAP must be reloaded within the epilogue. But choose either
8638 over the SP due to increased encoding size. */
8642 base_reg = hard_frame_pointer_rtx;
8643 base_offset = m->fs.fp_offset - cfa_offset;
8645 else if (m->fs.drap_valid)
8647 base_reg = crtl->drap_reg;
8648 base_offset = 0 - cfa_offset;
8650 else if (m->fs.sp_valid)
8652 base_reg = stack_pointer_rtx;
8653 base_offset = m->fs.sp_offset - cfa_offset;
8658 HOST_WIDE_INT toffset;
8661 /* Choose the base register with the smallest address encoding.
8662 With a tie, choose FP > DRAP > SP. */
8665 base_reg = stack_pointer_rtx;
8666 base_offset = m->fs.sp_offset - cfa_offset;
8667 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8669 if (m->fs.drap_valid)
8671 toffset = 0 - cfa_offset;
8672 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8675 base_reg = crtl->drap_reg;
8676 base_offset = toffset;
8682 toffset = m->fs.fp_offset - cfa_offset;
8683 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8686 base_reg = hard_frame_pointer_rtx;
8687 base_offset = toffset;
8692 gcc_assert (base_reg != NULL);
8694 return plus_constant (base_reg, base_offset);
8697 /* Emit code to save registers in the prologue. */
8700 ix86_emit_save_regs (void)
8705 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8706 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8708 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8709 RTX_FRAME_RELATED_P (insn) = 1;
8713 /* Emit a single register save at CFA - CFA_OFFSET. */
8716 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8717 HOST_WIDE_INT cfa_offset)
8719 struct machine_function *m = cfun->machine;
8720 rtx reg = gen_rtx_REG (mode, regno);
8721 rtx mem, addr, base, insn;
8723 addr = choose_baseaddr (cfa_offset);
8724 mem = gen_frame_mem (mode, addr);
8726 /* For SSE saves, we need to indicate the 128-bit alignment. */
8727 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8729 insn = emit_move_insn (mem, reg);
8730 RTX_FRAME_RELATED_P (insn) = 1;
8733 if (GET_CODE (base) == PLUS)
8734 base = XEXP (base, 0);
8735 gcc_checking_assert (REG_P (base));
8737 /* When saving registers into a re-aligned local stack frame, avoid
8738 any tricky guessing by dwarf2out. */
8739 if (m->fs.realigned)
8741 gcc_checking_assert (stack_realign_drap);
8743 if (regno == REGNO (crtl->drap_reg))
8745 /* A bit of a hack. We force the DRAP register to be saved in
8746 the re-aligned stack frame, which provides us with a copy
8747 of the CFA that will last past the prologue. Install it. */
8748 gcc_checking_assert (cfun->machine->fs.fp_valid);
8749 addr = plus_constant (hard_frame_pointer_rtx,
8750 cfun->machine->fs.fp_offset - cfa_offset);
8751 mem = gen_rtx_MEM (mode, addr);
8752 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8756 /* The frame pointer is a stable reference within the
8757 aligned frame. Use it. */
8758 gcc_checking_assert (cfun->machine->fs.fp_valid);
8759 addr = plus_constant (hard_frame_pointer_rtx,
8760 cfun->machine->fs.fp_offset - cfa_offset);
8761 mem = gen_rtx_MEM (mode, addr);
8762 add_reg_note (insn, REG_CFA_EXPRESSION,
8763 gen_rtx_SET (VOIDmode, mem, reg));
8767 /* The memory may not be relative to the current CFA register,
8768 which means that we may need to generate a new pattern for
8769 use by the unwind info. */
8770 else if (base != m->fs.cfa_reg)
8772 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8773 mem = gen_rtx_MEM (mode, addr);
8774 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8778 /* Emit code to save registers using MOV insns.
8779 First register is stored at CFA - CFA_OFFSET. */
8781 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8785 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8786 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8788 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8789 cfa_offset -= UNITS_PER_WORD;
8793 /* Emit code to save SSE registers using MOV insns.
8794 First register is stored at CFA - CFA_OFFSET. */
8796 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8800 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8801 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8803 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8808 static GTY(()) rtx queued_cfa_restores;
8810 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8811 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8812 Don't add the note if the previously saved value will be left untouched
8813 within stack red-zone till return, as unwinders can find the same value
8814 in the register and on the stack. */
8817 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8819 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8824 add_reg_note (insn, REG_CFA_RESTORE, reg);
8825 RTX_FRAME_RELATED_P (insn) = 1;
8829 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8832 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8835 ix86_add_queued_cfa_restore_notes (rtx insn)
8838 if (!queued_cfa_restores)
8840 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8842 XEXP (last, 1) = REG_NOTES (insn);
8843 REG_NOTES (insn) = queued_cfa_restores;
8844 queued_cfa_restores = NULL_RTX;
8845 RTX_FRAME_RELATED_P (insn) = 1;
8848 /* Expand prologue or epilogue stack adjustment.
8849 The pattern exist to put a dependency on all ebp-based memory accesses.
8850 STYLE should be negative if instructions should be marked as frame related,
8851 zero if %r11 register is live and cannot be freely used and positive
8855 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8856 int style, bool set_cfa)
8858 struct machine_function *m = cfun->machine;
8862 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
8863 else if (x86_64_immediate_operand (offset, DImode))
8864 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
8868 /* r11 is used by indirect sibcall return as well, set before the
8869 epilogue and used after the epilogue. */
8871 tmp = gen_rtx_REG (DImode, R11_REG);
8874 gcc_assert (src != hard_frame_pointer_rtx
8875 && dest != hard_frame_pointer_rtx);
8876 tmp = hard_frame_pointer_rtx;
8878 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8880 RTX_FRAME_RELATED_P (insn) = 1;
8882 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
8885 insn = emit_insn (insn);
8887 ix86_add_queued_cfa_restore_notes (insn);
8893 gcc_assert (m->fs.cfa_reg == src);
8894 m->fs.cfa_offset += INTVAL (offset);
8895 m->fs.cfa_reg = dest;
8897 r = gen_rtx_PLUS (Pmode, src, offset);
8898 r = gen_rtx_SET (VOIDmode, dest, r);
8899 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8900 RTX_FRAME_RELATED_P (insn) = 1;
8903 RTX_FRAME_RELATED_P (insn) = 1;
8905 if (dest == stack_pointer_rtx)
8907 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8908 bool valid = m->fs.sp_valid;
8910 if (src == hard_frame_pointer_rtx)
8912 valid = m->fs.fp_valid;
8913 ooffset = m->fs.fp_offset;
8915 else if (src == crtl->drap_reg)
8917 valid = m->fs.drap_valid;
8922 /* Else there are two possibilities: SP itself, which we set
8923 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8924 taken care of this by hand along the eh_return path. */
8925 gcc_checking_assert (src == stack_pointer_rtx
8926 || offset == const0_rtx);
8929 m->fs.sp_offset = ooffset - INTVAL (offset);
8930 m->fs.sp_valid = valid;
8934 /* Find an available register to be used as dynamic realign argument
8935 pointer regsiter. Such a register will be written in prologue and
8936 used in begin of body, so it must not be
8937 1. parameter passing register.
8939 We reuse static-chain register if it is available. Otherwise, we
8940 use DI for i386 and R13 for x86-64. We chose R13 since it has
8943 Return: the regno of chosen register. */
8946 find_drap_reg (void)
8948 tree decl = cfun->decl;
8952 /* Use R13 for nested function or function need static chain.
8953 Since function with tail call may use any caller-saved
8954 registers in epilogue, DRAP must not use caller-saved
8955 register in such case. */
8956 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8963 /* Use DI for nested function or function need static chain.
8964 Since function with tail call may use any caller-saved
8965 registers in epilogue, DRAP must not use caller-saved
8966 register in such case. */
8967 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8970 /* Reuse static chain register if it isn't used for parameter
8972 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8973 && !lookup_attribute ("fastcall",
8974 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8975 && !lookup_attribute ("thiscall",
8976 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8983 /* Return minimum incoming stack alignment. */
8986 ix86_minimum_incoming_stack_boundary (bool sibcall)
8988 unsigned int incoming_stack_boundary;
8990 /* Prefer the one specified at command line. */
8991 if (ix86_user_incoming_stack_boundary)
8992 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8993 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8994 if -mstackrealign is used, it isn't used for sibcall check and
8995 estimated stack alignment is 128bit. */
8998 && ix86_force_align_arg_pointer
8999 && crtl->stack_alignment_estimated == 128)
9000 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9002 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9004 /* Incoming stack alignment can be changed on individual functions
9005 via force_align_arg_pointer attribute. We use the smallest
9006 incoming stack boundary. */
9007 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9008 && lookup_attribute (ix86_force_align_arg_pointer_string,
9009 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9010 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9012 /* The incoming stack frame has to be aligned at least at
9013 parm_stack_boundary. */
9014 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9015 incoming_stack_boundary = crtl->parm_stack_boundary;
9017 /* Stack at entrance of main is aligned by runtime. We use the
9018 smallest incoming stack boundary. */
9019 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9020 && DECL_NAME (current_function_decl)
9021 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9022 && DECL_FILE_SCOPE_P (current_function_decl))
9023 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9025 return incoming_stack_boundary;
9028 /* Update incoming stack boundary and estimated stack alignment. */
9031 ix86_update_stack_boundary (void)
9033 ix86_incoming_stack_boundary
9034 = ix86_minimum_incoming_stack_boundary (false);
9036 /* x86_64 vararg needs 16byte stack alignment for register save
9040 && crtl->stack_alignment_estimated < 128)
9041 crtl->stack_alignment_estimated = 128;
9044 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9045 needed or an rtx for DRAP otherwise. */
9048 ix86_get_drap_rtx (void)
9050 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9051 crtl->need_drap = true;
9053 if (stack_realign_drap)
9055 /* Assign DRAP to vDRAP and returns vDRAP */
9056 unsigned int regno = find_drap_reg ();
9061 arg_ptr = gen_rtx_REG (Pmode, regno);
9062 crtl->drap_reg = arg_ptr;
9065 drap_vreg = copy_to_reg (arg_ptr);
9069 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9072 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9073 RTX_FRAME_RELATED_P (insn) = 1;
9081 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9084 ix86_internal_arg_pointer (void)
9086 return virtual_incoming_args_rtx;
9089 struct scratch_reg {
9094 /* Return a short-lived scratch register for use on function entry.
9095 In 32-bit mode, it is valid only after the registers are saved
9096 in the prologue. This register must be released by means of
9097 release_scratch_register_on_entry once it is dead. */
9100 get_scratch_register_on_entry (struct scratch_reg *sr)
9108 /* We always use R11 in 64-bit mode. */
9113 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9115 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9116 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9117 int regparm = ix86_function_regparm (fntype, decl);
9119 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9121 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9122 for the static chain register. */
9123 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9124 && drap_regno != AX_REG)
9126 else if (regparm < 2 && drap_regno != DX_REG)
9128 /* ecx is the static chain register. */
9129 else if (regparm < 3 && !fastcall_p && !static_chain_p
9130 && drap_regno != CX_REG)
9132 else if (ix86_save_reg (BX_REG, true))
9134 /* esi is the static chain register. */
9135 else if (!(regparm == 3 && static_chain_p)
9136 && ix86_save_reg (SI_REG, true))
9138 else if (ix86_save_reg (DI_REG, true))
9142 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9147 sr->reg = gen_rtx_REG (Pmode, regno);
9150 rtx insn = emit_insn (gen_push (sr->reg));
9151 RTX_FRAME_RELATED_P (insn) = 1;
9155 /* Release a scratch register obtained from the preceding function. */
9158 release_scratch_register_on_entry (struct scratch_reg *sr)
9162 rtx x, insn = emit_insn (gen_pop (sr->reg));
9164 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9165 RTX_FRAME_RELATED_P (insn) = 1;
9166 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9167 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9168 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9172 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9174 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9177 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9179 /* We skip the probe for the first interval + a small dope of 4 words and
9180 probe that many bytes past the specified size to maintain a protection
9181 area at the botton of the stack. */
9182 const int dope = 4 * UNITS_PER_WORD;
9183 rtx size_rtx = GEN_INT (size);
9185 /* See if we have a constant small number of probes to generate. If so,
9186 that's the easy case. The run-time loop is made up of 11 insns in the
9187 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9188 for n # of intervals. */
9189 if (size <= 5 * PROBE_INTERVAL)
9191 HOST_WIDE_INT i, adjust;
9192 bool first_probe = true;
9194 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9195 values of N from 1 until it exceeds SIZE. If only one probe is
9196 needed, this will not generate any code. Then adjust and probe
9197 to PROBE_INTERVAL + SIZE. */
9198 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9202 adjust = 2 * PROBE_INTERVAL + dope;
9203 first_probe = false;
9206 adjust = PROBE_INTERVAL;
9208 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9209 plus_constant (stack_pointer_rtx, -adjust)));
9210 emit_stack_probe (stack_pointer_rtx);
9214 adjust = size + PROBE_INTERVAL + dope;
9216 adjust = size + PROBE_INTERVAL - i;
9218 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9219 plus_constant (stack_pointer_rtx, -adjust)));
9220 emit_stack_probe (stack_pointer_rtx);
9222 /* Adjust back to account for the additional first interval. */
9223 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9224 plus_constant (stack_pointer_rtx,
9225 PROBE_INTERVAL + dope)));
9228 /* Otherwise, do the same as above, but in a loop. Note that we must be
9229 extra careful with variables wrapping around because we might be at
9230 the very top (or the very bottom) of the address space and we have
9231 to be able to handle this case properly; in particular, we use an
9232 equality test for the loop condition. */
9235 HOST_WIDE_INT rounded_size;
9236 struct scratch_reg sr;
9238 get_scratch_register_on_entry (&sr);
9241 /* Step 1: round SIZE to the previous multiple of the interval. */
9243 rounded_size = size & -PROBE_INTERVAL;
9246 /* Step 2: compute initial and final value of the loop counter. */
9248 /* SP = SP_0 + PROBE_INTERVAL. */
9249 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9250 plus_constant (stack_pointer_rtx,
9251 - (PROBE_INTERVAL + dope))));
9253 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9254 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9255 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9256 gen_rtx_PLUS (Pmode, sr.reg,
9257 stack_pointer_rtx)));
9262 while (SP != LAST_ADDR)
9264 SP = SP + PROBE_INTERVAL
9268 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9269 values of N from 1 until it is equal to ROUNDED_SIZE. */
9271 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9274 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9275 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9277 if (size != rounded_size)
9279 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9280 plus_constant (stack_pointer_rtx,
9281 rounded_size - size)));
9282 emit_stack_probe (stack_pointer_rtx);
9285 /* Adjust back to account for the additional first interval. */
9286 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9287 plus_constant (stack_pointer_rtx,
9288 PROBE_INTERVAL + dope)));
9290 release_scratch_register_on_entry (&sr);
9293 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9294 cfun->machine->fs.sp_offset += size;
9296 /* Make sure nothing is scheduled before we are done. */
9297 emit_insn (gen_blockage ());
9300 /* Adjust the stack pointer up to REG while probing it. */
9303 output_adjust_stack_and_probe (rtx reg)
9305 static int labelno = 0;
9306 char loop_lab[32], end_lab[32];
9309 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9310 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9312 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9314 /* Jump to END_LAB if SP == LAST_ADDR. */
9315 xops[0] = stack_pointer_rtx;
9317 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9318 fputs ("\tje\t", asm_out_file);
9319 assemble_name_raw (asm_out_file, end_lab);
9320 fputc ('\n', asm_out_file);
9322 /* SP = SP + PROBE_INTERVAL. */
9323 xops[1] = GEN_INT (PROBE_INTERVAL);
9324 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9327 xops[1] = const0_rtx;
9328 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9330 fprintf (asm_out_file, "\tjmp\t");
9331 assemble_name_raw (asm_out_file, loop_lab);
9332 fputc ('\n', asm_out_file);
9334 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9339 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9340 inclusive. These are offsets from the current stack pointer. */
9343 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9345 /* See if we have a constant small number of probes to generate. If so,
9346 that's the easy case. The run-time loop is made up of 7 insns in the
9347 generic case while the compile-time loop is made up of n insns for n #
9349 if (size <= 7 * PROBE_INTERVAL)
9353 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9354 it exceeds SIZE. If only one probe is needed, this will not
9355 generate any code. Then probe at FIRST + SIZE. */
9356 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9357 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9359 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9362 /* Otherwise, do the same as above, but in a loop. Note that we must be
9363 extra careful with variables wrapping around because we might be at
9364 the very top (or the very bottom) of the address space and we have
9365 to be able to handle this case properly; in particular, we use an
9366 equality test for the loop condition. */
9369 HOST_WIDE_INT rounded_size, last;
9370 struct scratch_reg sr;
9372 get_scratch_register_on_entry (&sr);
9375 /* Step 1: round SIZE to the previous multiple of the interval. */
9377 rounded_size = size & -PROBE_INTERVAL;
9380 /* Step 2: compute initial and final value of the loop counter. */
9382 /* TEST_OFFSET = FIRST. */
9383 emit_move_insn (sr.reg, GEN_INT (-first));
9385 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9386 last = first + rounded_size;
9391 while (TEST_ADDR != LAST_ADDR)
9393 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9397 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9398 until it is equal to ROUNDED_SIZE. */
9400 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9403 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9404 that SIZE is equal to ROUNDED_SIZE. */
9406 if (size != rounded_size)
9407 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9410 rounded_size - size));
9412 release_scratch_register_on_entry (&sr);
9415 /* Make sure nothing is scheduled before we are done. */
9416 emit_insn (gen_blockage ());
9419 /* Probe a range of stack addresses from REG to END, inclusive. These are
9420 offsets from the current stack pointer. */
9423 output_probe_stack_range (rtx reg, rtx end)
9425 static int labelno = 0;
9426 char loop_lab[32], end_lab[32];
9429 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9430 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9432 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9434 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9437 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9438 fputs ("\tje\t", asm_out_file);
9439 assemble_name_raw (asm_out_file, end_lab);
9440 fputc ('\n', asm_out_file);
9442 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9443 xops[1] = GEN_INT (PROBE_INTERVAL);
9444 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9446 /* Probe at TEST_ADDR. */
9447 xops[0] = stack_pointer_rtx;
9449 xops[2] = const0_rtx;
9450 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9452 fprintf (asm_out_file, "\tjmp\t");
9453 assemble_name_raw (asm_out_file, loop_lab);
9454 fputc ('\n', asm_out_file);
9456 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9461 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9462 to be generated in correct form. */
9464 ix86_finalize_stack_realign_flags (void)
9466 /* Check if stack realign is really needed after reload, and
9467 stores result in cfun */
9468 unsigned int incoming_stack_boundary
9469 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9470 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9471 unsigned int stack_realign = (incoming_stack_boundary
9472 < (current_function_is_leaf
9473 ? crtl->max_used_stack_slot_alignment
9474 : crtl->stack_alignment_needed));
9476 if (crtl->stack_realign_finalized)
9478 /* After stack_realign_needed is finalized, we can't no longer
9480 gcc_assert (crtl->stack_realign_needed == stack_realign);
9484 crtl->stack_realign_needed = stack_realign;
9485 crtl->stack_realign_finalized = true;
9489 /* Expand the prologue into a bunch of separate insns. */
9492 ix86_expand_prologue (void)
9494 struct machine_function *m = cfun->machine;
9497 struct ix86_frame frame;
9498 HOST_WIDE_INT allocate;
9499 bool int_registers_saved;
9501 ix86_finalize_stack_realign_flags ();
9503 /* DRAP should not coexist with stack_realign_fp */
9504 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9506 memset (&m->fs, 0, sizeof (m->fs));
9508 /* Initialize CFA state for before the prologue. */
9509 m->fs.cfa_reg = stack_pointer_rtx;
9510 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9512 /* Track SP offset to the CFA. We continue tracking this after we've
9513 swapped the CFA register away from SP. In the case of re-alignment
9514 this is fudged; we're interested to offsets within the local frame. */
9515 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9516 m->fs.sp_valid = true;
9518 ix86_compute_frame_layout (&frame);
9520 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9522 /* We should have already generated an error for any use of
9523 ms_hook on a nested function. */
9524 gcc_checking_assert (!ix86_static_chain_on_stack);
9526 /* Check if profiling is active and we shall use profiling before
9527 prologue variant. If so sorry. */
9528 if (crtl->profile && flag_fentry != 0)
9529 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9531 /* In ix86_asm_output_function_label we emitted:
9532 8b ff movl.s %edi,%edi
9534 8b ec movl.s %esp,%ebp
9536 This matches the hookable function prologue in Win32 API
9537 functions in Microsoft Windows XP Service Pack 2 and newer.
9538 Wine uses this to enable Windows apps to hook the Win32 API
9539 functions provided by Wine.
9541 What that means is that we've already set up the frame pointer. */
9543 if (frame_pointer_needed
9544 && !(crtl->drap_reg && crtl->stack_realign_needed))
9548 /* We've decided to use the frame pointer already set up.
9549 Describe this to the unwinder by pretending that both
9550 push and mov insns happen right here.
9552 Putting the unwind info here at the end of the ms_hook
9553 is done so that we can make absolutely certain we get
9554 the required byte sequence at the start of the function,
9555 rather than relying on an assembler that can produce
9556 the exact encoding required.
9558 However it does mean (in the unpatched case) that we have
9559 a 1 insn window where the asynchronous unwind info is
9560 incorrect. However, if we placed the unwind info at
9561 its correct location we would have incorrect unwind info
9562 in the patched case. Which is probably all moot since
9563 I don't expect Wine generates dwarf2 unwind info for the
9564 system libraries that use this feature. */
9566 insn = emit_insn (gen_blockage ());
9568 push = gen_push (hard_frame_pointer_rtx);
9569 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9571 RTX_FRAME_RELATED_P (push) = 1;
9572 RTX_FRAME_RELATED_P (mov) = 1;
9574 RTX_FRAME_RELATED_P (insn) = 1;
9575 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9576 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9578 /* Note that gen_push incremented m->fs.cfa_offset, even
9579 though we didn't emit the push insn here. */
9580 m->fs.cfa_reg = hard_frame_pointer_rtx;
9581 m->fs.fp_offset = m->fs.cfa_offset;
9582 m->fs.fp_valid = true;
9586 /* The frame pointer is not needed so pop %ebp again.
9587 This leaves us with a pristine state. */
9588 emit_insn (gen_pop (hard_frame_pointer_rtx));
9592 /* The first insn of a function that accepts its static chain on the
9593 stack is to push the register that would be filled in by a direct
9594 call. This insn will be skipped by the trampoline. */
9595 else if (ix86_static_chain_on_stack)
9597 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9598 emit_insn (gen_blockage ());
9600 /* We don't want to interpret this push insn as a register save,
9601 only as a stack adjustment. The real copy of the register as
9602 a save will be done later, if needed. */
9603 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9604 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9605 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9606 RTX_FRAME_RELATED_P (insn) = 1;
9609 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9610 of DRAP is needed and stack realignment is really needed after reload */
9611 if (stack_realign_drap)
9613 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9615 /* Only need to push parameter pointer reg if it is caller saved. */
9616 if (!call_used_regs[REGNO (crtl->drap_reg)])
9618 /* Push arg pointer reg */
9619 insn = emit_insn (gen_push (crtl->drap_reg));
9620 RTX_FRAME_RELATED_P (insn) = 1;
9623 /* Grab the argument pointer. */
9624 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9625 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9626 RTX_FRAME_RELATED_P (insn) = 1;
9627 m->fs.cfa_reg = crtl->drap_reg;
9628 m->fs.cfa_offset = 0;
9630 /* Align the stack. */
9631 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9633 GEN_INT (-align_bytes)));
9634 RTX_FRAME_RELATED_P (insn) = 1;
9636 /* Replicate the return address on the stack so that return
9637 address can be reached via (argp - 1) slot. This is needed
9638 to implement macro RETURN_ADDR_RTX and intrinsic function
9639 expand_builtin_return_addr etc. */
9640 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9641 t = gen_frame_mem (Pmode, t);
9642 insn = emit_insn (gen_push (t));
9643 RTX_FRAME_RELATED_P (insn) = 1;
9645 /* For the purposes of frame and register save area addressing,
9646 we've started over with a new frame. */
9647 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9648 m->fs.realigned = true;
9651 if (frame_pointer_needed && !m->fs.fp_valid)
9653 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9654 slower on all targets. Also sdb doesn't like it. */
9655 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9656 RTX_FRAME_RELATED_P (insn) = 1;
9658 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9659 RTX_FRAME_RELATED_P (insn) = 1;
9661 if (m->fs.cfa_reg == stack_pointer_rtx)
9662 m->fs.cfa_reg = hard_frame_pointer_rtx;
9663 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9664 m->fs.fp_offset = m->fs.sp_offset;
9665 m->fs.fp_valid = true;
9668 int_registers_saved = (frame.nregs == 0);
9670 if (!int_registers_saved)
9672 /* If saving registers via PUSH, do so now. */
9673 if (!frame.save_regs_using_mov)
9675 ix86_emit_save_regs ();
9676 int_registers_saved = true;
9677 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9680 /* When using red zone we may start register saving before allocating
9681 the stack frame saving one cycle of the prologue. However, avoid
9682 doing this if we have to probe the stack; at least on x86_64 the
9683 stack probe can turn into a call that clobbers a red zone location. */
9684 else if (ix86_using_red_zone ()
9685 && (! TARGET_STACK_PROBE
9686 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9688 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9689 int_registers_saved = true;
9693 if (stack_realign_fp)
9695 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9696 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9698 /* The computation of the size of the re-aligned stack frame means
9699 that we must allocate the size of the register save area before
9700 performing the actual alignment. Otherwise we cannot guarantee
9701 that there's enough storage above the realignment point. */
9702 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9703 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9704 GEN_INT (m->fs.sp_offset
9705 - frame.sse_reg_save_offset),
9708 /* Align the stack. */
9709 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9711 GEN_INT (-align_bytes)));
9713 /* For the purposes of register save area addressing, the stack
9714 pointer is no longer valid. As for the value of sp_offset,
9715 see ix86_compute_frame_layout, which we need to match in order
9716 to pass verification of stack_pointer_offset at the end. */
9717 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9718 m->fs.sp_valid = false;
9721 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9723 if (flag_stack_usage)
9725 /* We start to count from ARG_POINTER. */
9726 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9728 /* If it was realigned, take into account the fake frame. */
9729 if (stack_realign_drap)
9731 if (ix86_static_chain_on_stack)
9732 stack_size += UNITS_PER_WORD;
9734 if (!call_used_regs[REGNO (crtl->drap_reg)])
9735 stack_size += UNITS_PER_WORD;
9737 /* This over-estimates by 1 minimal-stack-alignment-unit but
9738 mitigates that by counting in the new return address slot. */
9739 current_function_dynamic_stack_size
9740 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9743 current_function_static_stack_size = stack_size;
9746 /* The stack has already been decremented by the instruction calling us
9747 so we need to probe unconditionally to preserve the protection area. */
9748 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9750 /* We expect the registers to be saved when probes are used. */
9751 gcc_assert (int_registers_saved);
9753 if (STACK_CHECK_MOVING_SP)
9755 ix86_adjust_stack_and_probe (allocate);
9760 HOST_WIDE_INT size = allocate;
9762 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9763 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9765 if (TARGET_STACK_PROBE)
9766 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9768 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9774 else if (!ix86_target_stack_probe ()
9775 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9777 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9778 GEN_INT (-allocate), -1,
9779 m->fs.cfa_reg == stack_pointer_rtx);
9783 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9785 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
9787 bool eax_live = false;
9788 bool r10_live = false;
9791 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9792 if (!TARGET_64BIT_MS_ABI)
9793 eax_live = ix86_eax_live_at_start_p ();
9797 emit_insn (gen_push (eax));
9798 allocate -= UNITS_PER_WORD;
9802 r10 = gen_rtx_REG (Pmode, R10_REG);
9803 emit_insn (gen_push (r10));
9804 allocate -= UNITS_PER_WORD;
9807 emit_move_insn (eax, GEN_INT (allocate));
9808 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9810 /* Use the fact that AX still contains ALLOCATE. */
9811 adjust_stack_insn = (TARGET_64BIT
9812 ? gen_pro_epilogue_adjust_stack_di_sub
9813 : gen_pro_epilogue_adjust_stack_si_sub);
9815 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
9816 stack_pointer_rtx, eax));
9818 if (m->fs.cfa_reg == stack_pointer_rtx)
9820 m->fs.cfa_offset += allocate;
9822 RTX_FRAME_RELATED_P (insn) = 1;
9823 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9824 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9825 plus_constant (stack_pointer_rtx,
9828 m->fs.sp_offset += allocate;
9830 if (r10_live && eax_live)
9832 t = choose_baseaddr (m->fs.sp_offset - allocate);
9833 emit_move_insn (r10, gen_frame_mem (Pmode, t));
9834 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
9835 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9837 else if (eax_live || r10_live)
9839 t = choose_baseaddr (m->fs.sp_offset - allocate);
9840 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
9843 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9845 if (!int_registers_saved)
9846 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9848 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9850 pic_reg_used = false;
9851 if (pic_offset_table_rtx
9852 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9855 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9857 if (alt_pic_reg_used != INVALID_REGNUM)
9858 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9860 pic_reg_used = true;
9867 if (ix86_cmodel == CM_LARGE_PIC)
9869 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9870 rtx label = gen_label_rtx ();
9872 LABEL_PRESERVE_P (label) = 1;
9873 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9874 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9875 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9876 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9877 pic_offset_table_rtx, tmp_reg));
9880 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9883 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9886 /* In the pic_reg_used case, make sure that the got load isn't deleted
9887 when mcount needs it. Blockage to avoid call movement across mcount
9888 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9890 if (crtl->profile && !flag_fentry && pic_reg_used)
9891 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9893 if (crtl->drap_reg && !crtl->stack_realign_needed)
9895 /* vDRAP is setup but after reload it turns out stack realign
9896 isn't necessary, here we will emit prologue to setup DRAP
9897 without stack realign adjustment */
9898 t = choose_baseaddr (0);
9899 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9902 /* Prevent instructions from being scheduled into register save push
9903 sequence when access to the redzone area is done through frame pointer.
9904 The offset between the frame pointer and the stack pointer is calculated
9905 relative to the value of the stack pointer at the end of the function
9906 prologue, and moving instructions that access redzone area via frame
9907 pointer inside push sequence violates this assumption. */
9908 if (frame_pointer_needed && frame.red_zone_size)
9909 emit_insn (gen_memory_blockage ());
9911 /* Emit cld instruction if stringops are used in the function. */
9912 if (TARGET_CLD && ix86_current_function_needs_cld)
9913 emit_insn (gen_cld ());
9916 /* Emit code to restore REG using a POP insn. */
9919 ix86_emit_restore_reg_using_pop (rtx reg)
9921 struct machine_function *m = cfun->machine;
9922 rtx insn = emit_insn (gen_pop (reg));
9924 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9925 m->fs.sp_offset -= UNITS_PER_WORD;
9927 if (m->fs.cfa_reg == crtl->drap_reg
9928 && REGNO (reg) == REGNO (crtl->drap_reg))
9930 /* Previously we'd represented the CFA as an expression
9931 like *(%ebp - 8). We've just popped that value from
9932 the stack, which means we need to reset the CFA to
9933 the drap register. This will remain until we restore
9934 the stack pointer. */
9935 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9936 RTX_FRAME_RELATED_P (insn) = 1;
9938 /* This means that the DRAP register is valid for addressing too. */
9939 m->fs.drap_valid = true;
9943 if (m->fs.cfa_reg == stack_pointer_rtx)
9945 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9946 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9947 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9948 RTX_FRAME_RELATED_P (insn) = 1;
9950 m->fs.cfa_offset -= UNITS_PER_WORD;
9953 /* When the frame pointer is the CFA, and we pop it, we are
9954 swapping back to the stack pointer as the CFA. This happens
9955 for stack frames that don't allocate other data, so we assume
9956 the stack pointer is now pointing at the return address, i.e.
9957 the function entry state, which makes the offset be 1 word. */
9958 if (reg == hard_frame_pointer_rtx)
9960 m->fs.fp_valid = false;
9961 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9963 m->fs.cfa_reg = stack_pointer_rtx;
9964 m->fs.cfa_offset -= UNITS_PER_WORD;
9966 add_reg_note (insn, REG_CFA_DEF_CFA,
9967 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9968 GEN_INT (m->fs.cfa_offset)));
9969 RTX_FRAME_RELATED_P (insn) = 1;
9974 /* Emit code to restore saved registers using POP insns. */
9977 ix86_emit_restore_regs_using_pop (void)
9981 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9982 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9983 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9986 /* Emit code and notes for the LEAVE instruction. */
9989 ix86_emit_leave (void)
9991 struct machine_function *m = cfun->machine;
9992 rtx insn = emit_insn (ix86_gen_leave ());
9994 ix86_add_queued_cfa_restore_notes (insn);
9996 gcc_assert (m->fs.fp_valid);
9997 m->fs.sp_valid = true;
9998 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9999 m->fs.fp_valid = false;
10001 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10003 m->fs.cfa_reg = stack_pointer_rtx;
10004 m->fs.cfa_offset = m->fs.sp_offset;
10006 add_reg_note (insn, REG_CFA_DEF_CFA,
10007 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10008 RTX_FRAME_RELATED_P (insn) = 1;
10009 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10014 /* Emit code to restore saved registers using MOV insns.
10015 First register is restored from CFA - CFA_OFFSET. */
10017 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10018 int maybe_eh_return)
10020 struct machine_function *m = cfun->machine;
10021 unsigned int regno;
10023 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10024 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10026 rtx reg = gen_rtx_REG (Pmode, regno);
10029 mem = choose_baseaddr (cfa_offset);
10030 mem = gen_frame_mem (Pmode, mem);
10031 insn = emit_move_insn (reg, mem);
10033 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10035 /* Previously we'd represented the CFA as an expression
10036 like *(%ebp - 8). We've just popped that value from
10037 the stack, which means we need to reset the CFA to
10038 the drap register. This will remain until we restore
10039 the stack pointer. */
10040 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10041 RTX_FRAME_RELATED_P (insn) = 1;
10043 /* This means that the DRAP register is valid for addressing. */
10044 m->fs.drap_valid = true;
10047 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10049 cfa_offset -= UNITS_PER_WORD;
10053 /* Emit code to restore saved registers using MOV insns.
10054 First register is restored from CFA - CFA_OFFSET. */
10056 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10057 int maybe_eh_return)
10059 unsigned int regno;
10061 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10062 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10064 rtx reg = gen_rtx_REG (V4SFmode, regno);
10067 mem = choose_baseaddr (cfa_offset);
10068 mem = gen_rtx_MEM (V4SFmode, mem);
10069 set_mem_align (mem, 128);
10070 emit_move_insn (reg, mem);
10072 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10078 /* Restore function stack, frame, and registers. */
10081 ix86_expand_epilogue (int style)
10083 struct machine_function *m = cfun->machine;
10084 struct machine_frame_state frame_state_save = m->fs;
10085 struct ix86_frame frame;
10086 bool restore_regs_via_mov;
10089 ix86_finalize_stack_realign_flags ();
10090 ix86_compute_frame_layout (&frame);
10092 m->fs.sp_valid = (!frame_pointer_needed
10093 || (current_function_sp_is_unchanging
10094 && !stack_realign_fp));
10095 gcc_assert (!m->fs.sp_valid
10096 || m->fs.sp_offset == frame.stack_pointer_offset);
10098 /* The FP must be valid if the frame pointer is present. */
10099 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10100 gcc_assert (!m->fs.fp_valid
10101 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10103 /* We must have *some* valid pointer to the stack frame. */
10104 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10106 /* The DRAP is never valid at this point. */
10107 gcc_assert (!m->fs.drap_valid);
10109 /* See the comment about red zone and frame
10110 pointer usage in ix86_expand_prologue. */
10111 if (frame_pointer_needed && frame.red_zone_size)
10112 emit_insn (gen_memory_blockage ());
10114 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10115 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10117 /* Determine the CFA offset of the end of the red-zone. */
10118 m->fs.red_zone_offset = 0;
10119 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10121 /* The red-zone begins below the return address. */
10122 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10124 /* When the register save area is in the aligned portion of
10125 the stack, determine the maximum runtime displacement that
10126 matches up with the aligned frame. */
10127 if (stack_realign_drap)
10128 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10132 /* Special care must be taken for the normal return case of a function
10133 using eh_return: the eax and edx registers are marked as saved, but
10134 not restored along this path. Adjust the save location to match. */
10135 if (crtl->calls_eh_return && style != 2)
10136 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10138 /* If we're only restoring one register and sp is not valid then
10139 using a move instruction to restore the register since it's
10140 less work than reloading sp and popping the register. */
10141 if (!m->fs.sp_valid && frame.nregs <= 1)
10142 restore_regs_via_mov = true;
10143 /* EH_RETURN requires the use of moves to function properly. */
10144 else if (crtl->calls_eh_return)
10145 restore_regs_via_mov = true;
10146 else if (TARGET_EPILOGUE_USING_MOVE
10147 && cfun->machine->use_fast_prologue_epilogue
10148 && (frame.nregs > 1
10149 || m->fs.sp_offset != frame.reg_save_offset))
10150 restore_regs_via_mov = true;
10151 else if (frame_pointer_needed
10153 && m->fs.sp_offset != frame.reg_save_offset)
10154 restore_regs_via_mov = true;
10155 else if (frame_pointer_needed
10156 && TARGET_USE_LEAVE
10157 && cfun->machine->use_fast_prologue_epilogue
10158 && frame.nregs == 1)
10159 restore_regs_via_mov = true;
10161 restore_regs_via_mov = false;
10163 if (restore_regs_via_mov || frame.nsseregs)
10165 /* Ensure that the entire register save area is addressable via
10166 the stack pointer, if we will restore via sp. */
10168 && m->fs.sp_offset > 0x7fffffff
10169 && !(m->fs.fp_valid || m->fs.drap_valid)
10170 && (frame.nsseregs + frame.nregs) != 0)
10172 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10173 GEN_INT (m->fs.sp_offset
10174 - frame.sse_reg_save_offset),
10176 m->fs.cfa_reg == stack_pointer_rtx);
10180 /* If there are any SSE registers to restore, then we have to do it
10181 via moves, since there's obviously no pop for SSE regs. */
10182 if (frame.nsseregs)
10183 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10186 if (restore_regs_via_mov)
10191 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10193 /* eh_return epilogues need %ecx added to the stack pointer. */
10196 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10198 /* Stack align doesn't work with eh_return. */
10199 gcc_assert (!stack_realign_drap);
10200 /* Neither does regparm nested functions. */
10201 gcc_assert (!ix86_static_chain_on_stack);
10203 if (frame_pointer_needed)
10205 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10206 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10207 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10209 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10210 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10212 /* Note that we use SA as a temporary CFA, as the return
10213 address is at the proper place relative to it. We
10214 pretend this happens at the FP restore insn because
10215 prior to this insn the FP would be stored at the wrong
10216 offset relative to SA, and after this insn we have no
10217 other reasonable register to use for the CFA. We don't
10218 bother resetting the CFA to the SP for the duration of
10219 the return insn. */
10220 add_reg_note (insn, REG_CFA_DEF_CFA,
10221 plus_constant (sa, UNITS_PER_WORD));
10222 ix86_add_queued_cfa_restore_notes (insn);
10223 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10224 RTX_FRAME_RELATED_P (insn) = 1;
10226 m->fs.cfa_reg = sa;
10227 m->fs.cfa_offset = UNITS_PER_WORD;
10228 m->fs.fp_valid = false;
10230 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10231 const0_rtx, style, false);
10235 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10236 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10237 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10238 ix86_add_queued_cfa_restore_notes (insn);
10240 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10241 if (m->fs.cfa_offset != UNITS_PER_WORD)
10243 m->fs.cfa_offset = UNITS_PER_WORD;
10244 add_reg_note (insn, REG_CFA_DEF_CFA,
10245 plus_constant (stack_pointer_rtx,
10247 RTX_FRAME_RELATED_P (insn) = 1;
10250 m->fs.sp_offset = UNITS_PER_WORD;
10251 m->fs.sp_valid = true;
10256 /* First step is to deallocate the stack frame so that we can
10257 pop the registers. */
10258 if (!m->fs.sp_valid)
10260 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10261 GEN_INT (m->fs.fp_offset
10262 - frame.reg_save_offset),
10265 else if (m->fs.sp_offset != frame.reg_save_offset)
10267 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10268 GEN_INT (m->fs.sp_offset
10269 - frame.reg_save_offset),
10271 m->fs.cfa_reg == stack_pointer_rtx);
10274 ix86_emit_restore_regs_using_pop ();
10277 /* If we used a stack pointer and haven't already got rid of it,
10279 if (m->fs.fp_valid)
10281 /* If the stack pointer is valid and pointing at the frame
10282 pointer store address, then we only need a pop. */
10283 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10284 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10285 /* Leave results in shorter dependency chains on CPUs that are
10286 able to grok it fast. */
10287 else if (TARGET_USE_LEAVE
10288 || optimize_function_for_size_p (cfun)
10289 || !cfun->machine->use_fast_prologue_epilogue)
10290 ix86_emit_leave ();
10293 pro_epilogue_adjust_stack (stack_pointer_rtx,
10294 hard_frame_pointer_rtx,
10295 const0_rtx, style, !using_drap);
10296 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10302 int param_ptr_offset = UNITS_PER_WORD;
10305 gcc_assert (stack_realign_drap);
10307 if (ix86_static_chain_on_stack)
10308 param_ptr_offset += UNITS_PER_WORD;
10309 if (!call_used_regs[REGNO (crtl->drap_reg)])
10310 param_ptr_offset += UNITS_PER_WORD;
10312 insn = emit_insn (gen_rtx_SET
10313 (VOIDmode, stack_pointer_rtx,
10314 gen_rtx_PLUS (Pmode,
10316 GEN_INT (-param_ptr_offset))));
10317 m->fs.cfa_reg = stack_pointer_rtx;
10318 m->fs.cfa_offset = param_ptr_offset;
10319 m->fs.sp_offset = param_ptr_offset;
10320 m->fs.realigned = false;
10322 add_reg_note (insn, REG_CFA_DEF_CFA,
10323 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10324 GEN_INT (param_ptr_offset)));
10325 RTX_FRAME_RELATED_P (insn) = 1;
10327 if (!call_used_regs[REGNO (crtl->drap_reg)])
10328 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10331 /* At this point the stack pointer must be valid, and we must have
10332 restored all of the registers. We may not have deallocated the
10333 entire stack frame. We've delayed this until now because it may
10334 be possible to merge the local stack deallocation with the
10335 deallocation forced by ix86_static_chain_on_stack. */
10336 gcc_assert (m->fs.sp_valid);
10337 gcc_assert (!m->fs.fp_valid);
10338 gcc_assert (!m->fs.realigned);
10339 if (m->fs.sp_offset != UNITS_PER_WORD)
10341 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10342 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10346 /* Sibcall epilogues don't want a return instruction. */
10349 m->fs = frame_state_save;
10353 if (crtl->args.pops_args && crtl->args.size)
10355 rtx popc = GEN_INT (crtl->args.pops_args);
10357 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10358 address, do explicit add, and jump indirectly to the caller. */
10360 if (crtl->args.pops_args >= 65536)
10362 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10365 /* There is no "pascal" calling convention in any 64bit ABI. */
10366 gcc_assert (!TARGET_64BIT);
10368 insn = emit_insn (gen_pop (ecx));
10369 m->fs.cfa_offset -= UNITS_PER_WORD;
10370 m->fs.sp_offset -= UNITS_PER_WORD;
10372 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10373 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10374 add_reg_note (insn, REG_CFA_REGISTER,
10375 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10376 RTX_FRAME_RELATED_P (insn) = 1;
10378 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10380 emit_jump_insn (gen_return_indirect_internal (ecx));
10383 emit_jump_insn (gen_return_pop_internal (popc));
10386 emit_jump_insn (gen_return_internal ());
10388 /* Restore the state back to the state from the prologue,
10389 so that it's correct for the next epilogue. */
10390 m->fs = frame_state_save;
10393 /* Reset from the function's potential modifications. */
10396 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10397 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10399 if (pic_offset_table_rtx)
10400 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10402 /* Mach-O doesn't support labels at the end of objects, so if
10403 it looks like we might want one, insert a NOP. */
10405 rtx insn = get_last_insn ();
10408 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10409 insn = PREV_INSN (insn);
10413 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10414 fputs ("\tnop\n", file);
10420 /* Return a scratch register to use in the split stack prologue. The
10421 split stack prologue is used for -fsplit-stack. It is the first
10422 instructions in the function, even before the regular prologue.
10423 The scratch register can be any caller-saved register which is not
10424 used for parameters or for the static chain. */
10426 static unsigned int
10427 split_stack_prologue_scratch_regno (void)
10436 is_fastcall = (lookup_attribute ("fastcall",
10437 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10439 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10443 if (DECL_STATIC_CHAIN (cfun->decl))
10445 sorry ("-fsplit-stack does not support fastcall with "
10446 "nested function");
10447 return INVALID_REGNUM;
10451 else if (regparm < 3)
10453 if (!DECL_STATIC_CHAIN (cfun->decl))
10459 sorry ("-fsplit-stack does not support 2 register "
10460 " parameters for a nested function");
10461 return INVALID_REGNUM;
10468 /* FIXME: We could make this work by pushing a register
10469 around the addition and comparison. */
10470 sorry ("-fsplit-stack does not support 3 register parameters");
10471 return INVALID_REGNUM;
10476 /* A SYMBOL_REF for the function which allocates new stackspace for
10479 static GTY(()) rtx split_stack_fn;
10481 /* Handle -fsplit-stack. These are the first instructions in the
10482 function, even before the regular prologue. */
10485 ix86_expand_split_stack_prologue (void)
10487 struct ix86_frame frame;
10488 HOST_WIDE_INT allocate;
10490 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10491 rtx scratch_reg = NULL_RTX;
10492 rtx varargs_label = NULL_RTX;
10494 gcc_assert (flag_split_stack && reload_completed);
10496 ix86_finalize_stack_realign_flags ();
10497 ix86_compute_frame_layout (&frame);
10498 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10500 /* This is the label we will branch to if we have enough stack
10501 space. We expect the basic block reordering pass to reverse this
10502 branch if optimizing, so that we branch in the unlikely case. */
10503 label = gen_label_rtx ();
10505 /* We need to compare the stack pointer minus the frame size with
10506 the stack boundary in the TCB. The stack boundary always gives
10507 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10508 can compare directly. Otherwise we need to do an addition. */
10510 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10511 UNSPEC_STACK_CHECK);
10512 limit = gen_rtx_CONST (Pmode, limit);
10513 limit = gen_rtx_MEM (Pmode, limit);
10514 if (allocate < SPLIT_STACK_AVAILABLE)
10515 current = stack_pointer_rtx;
10518 unsigned int scratch_regno;
10521 /* We need a scratch register to hold the stack pointer minus
10522 the required frame size. Since this is the very start of the
10523 function, the scratch register can be any caller-saved
10524 register which is not used for parameters. */
10525 offset = GEN_INT (- allocate);
10526 scratch_regno = split_stack_prologue_scratch_regno ();
10527 if (scratch_regno == INVALID_REGNUM)
10529 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10530 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10532 /* We don't use ix86_gen_add3 in this case because it will
10533 want to split to lea, but when not optimizing the insn
10534 will not be split after this point. */
10535 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10536 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10541 emit_move_insn (scratch_reg, offset);
10542 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10543 stack_pointer_rtx));
10545 current = scratch_reg;
10548 ix86_expand_branch (GEU, current, limit, label);
10549 jump_insn = get_last_insn ();
10550 JUMP_LABEL (jump_insn) = label;
10552 /* Mark the jump as very likely to be taken. */
10553 add_reg_note (jump_insn, REG_BR_PROB,
10554 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10556 /* Get more stack space. We pass in the desired stack space and the
10557 size of the arguments to copy to the new stack. In 32-bit mode
10558 we push the parameters; __morestack will return on a new stack
10559 anyhow. In 64-bit mode we pass the parameters in r10 and
10561 allocate_rtx = GEN_INT (allocate);
10562 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10563 call_fusage = NULL_RTX;
10568 reg = gen_rtx_REG (Pmode, R10_REG);
10570 /* If this function uses a static chain, it will be in %r10.
10571 Preserve it across the call to __morestack. */
10572 if (DECL_STATIC_CHAIN (cfun->decl))
10576 rax = gen_rtx_REG (Pmode, AX_REG);
10577 emit_move_insn (rax, reg);
10578 use_reg (&call_fusage, rax);
10581 emit_move_insn (reg, allocate_rtx);
10582 use_reg (&call_fusage, reg);
10583 reg = gen_rtx_REG (Pmode, R11_REG);
10584 emit_move_insn (reg, GEN_INT (args_size));
10585 use_reg (&call_fusage, reg);
10589 emit_insn (gen_push (GEN_INT (args_size)));
10590 emit_insn (gen_push (allocate_rtx));
10592 if (split_stack_fn == NULL_RTX)
10593 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10594 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
10595 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10597 add_function_usage_to (call_insn, call_fusage);
10599 /* In order to make call/return prediction work right, we now need
10600 to execute a return instruction. See
10601 libgcc/config/i386/morestack.S for the details on how this works.
10603 For flow purposes gcc must not see this as a return
10604 instruction--we need control flow to continue at the subsequent
10605 label. Therefore, we use an unspec. */
10606 gcc_assert (crtl->args.pops_args < 65536);
10607 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10609 /* If we are in 64-bit mode and this function uses a static chain,
10610 we saved %r10 in %rax before calling _morestack. */
10611 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10612 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10613 gen_rtx_REG (Pmode, AX_REG));
10615 /* If this function calls va_start, we need to store a pointer to
10616 the arguments on the old stack, because they may not have been
10617 all copied to the new stack. At this point the old stack can be
10618 found at the frame pointer value used by __morestack, because
10619 __morestack has set that up before calling back to us. Here we
10620 store that pointer in a scratch register, and in
10621 ix86_expand_prologue we store the scratch register in a stack
10623 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10625 unsigned int scratch_regno;
10629 scratch_regno = split_stack_prologue_scratch_regno ();
10630 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10631 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10635 return address within this function
10636 return address of caller of this function
10638 So we add three words to get to the stack arguments.
10642 return address within this function
10643 first argument to __morestack
10644 second argument to __morestack
10645 return address of caller of this function
10647 So we add five words to get to the stack arguments.
10649 words = TARGET_64BIT ? 3 : 5;
10650 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10651 gen_rtx_PLUS (Pmode, frame_reg,
10652 GEN_INT (words * UNITS_PER_WORD))));
10654 varargs_label = gen_label_rtx ();
10655 emit_jump_insn (gen_jump (varargs_label));
10656 JUMP_LABEL (get_last_insn ()) = varargs_label;
10661 emit_label (label);
10662 LABEL_NUSES (label) = 1;
10664 /* If this function calls va_start, we now have to set the scratch
10665 register for the case where we do not call __morestack. In this
10666 case we need to set it based on the stack pointer. */
10667 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10669 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10670 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10671 GEN_INT (UNITS_PER_WORD))));
10673 emit_label (varargs_label);
10674 LABEL_NUSES (varargs_label) = 1;
10678 /* We may have to tell the dataflow pass that the split stack prologue
10679 is initializing a scratch register. */
10682 ix86_live_on_entry (bitmap regs)
10684 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10686 gcc_assert (flag_split_stack);
10687 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10691 /* Extract the parts of an RTL expression that is a valid memory address
10692 for an instruction. Return 0 if the structure of the address is
10693 grossly off. Return -1 if the address contains ASHIFT, so it is not
10694 strictly valid, but still used for computing length of lea instruction. */
10697 ix86_decompose_address (rtx addr, struct ix86_address *out)
10699 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10700 rtx base_reg, index_reg;
10701 HOST_WIDE_INT scale = 1;
10702 rtx scale_rtx = NULL_RTX;
10705 enum ix86_address_seg seg = SEG_DEFAULT;
10707 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10709 else if (GET_CODE (addr) == PLUS)
10711 rtx addends[4], op;
10719 addends[n++] = XEXP (op, 1);
10722 while (GET_CODE (op) == PLUS);
10727 for (i = n; i >= 0; --i)
10730 switch (GET_CODE (op))
10735 index = XEXP (op, 0);
10736 scale_rtx = XEXP (op, 1);
10742 index = XEXP (op, 0);
10743 tmp = XEXP (op, 1);
10744 if (!CONST_INT_P (tmp))
10746 scale = INTVAL (tmp);
10747 if ((unsigned HOST_WIDE_INT) scale > 3)
10749 scale = 1 << scale;
10753 if (XINT (op, 1) == UNSPEC_TP
10754 && TARGET_TLS_DIRECT_SEG_REFS
10755 && seg == SEG_DEFAULT)
10756 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10785 else if (GET_CODE (addr) == MULT)
10787 index = XEXP (addr, 0); /* index*scale */
10788 scale_rtx = XEXP (addr, 1);
10790 else if (GET_CODE (addr) == ASHIFT)
10792 /* We're called for lea too, which implements ashift on occasion. */
10793 index = XEXP (addr, 0);
10794 tmp = XEXP (addr, 1);
10795 if (!CONST_INT_P (tmp))
10797 scale = INTVAL (tmp);
10798 if ((unsigned HOST_WIDE_INT) scale > 3)
10800 scale = 1 << scale;
10804 disp = addr; /* displacement */
10806 /* Extract the integral value of scale. */
10809 if (!CONST_INT_P (scale_rtx))
10811 scale = INTVAL (scale_rtx);
10814 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10815 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10817 /* Avoid useless 0 displacement. */
10818 if (disp == const0_rtx && (base || index))
10821 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10822 if (base_reg && index_reg && scale == 1
10823 && (index_reg == arg_pointer_rtx
10824 || index_reg == frame_pointer_rtx
10825 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10828 tmp = base, base = index, index = tmp;
10829 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10832 /* Special case: %ebp cannot be encoded as a base without a displacement.
10836 && (base_reg == hard_frame_pointer_rtx
10837 || base_reg == frame_pointer_rtx
10838 || base_reg == arg_pointer_rtx
10839 || (REG_P (base_reg)
10840 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10841 || REGNO (base_reg) == R13_REG))))
10844 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10845 Avoid this by transforming to [%esi+0].
10846 Reload calls address legitimization without cfun defined, so we need
10847 to test cfun for being non-NULL. */
10848 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10849 && base_reg && !index_reg && !disp
10850 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10853 /* Special case: encode reg+reg instead of reg*2. */
10854 if (!base && index && scale == 2)
10855 base = index, base_reg = index_reg, scale = 1;
10857 /* Special case: scaling cannot be encoded without base or displacement. */
10858 if (!base && !disp && index && scale != 1)
10862 out->index = index;
10864 out->scale = scale;
10870 /* Return cost of the memory address x.
10871 For i386, it is better to use a complex address than let gcc copy
10872 the address into a reg and make a new pseudo. But not if the address
10873 requires to two regs - that would mean more pseudos with longer
10876 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10878 struct ix86_address parts;
10880 int ok = ix86_decompose_address (x, &parts);
10884 if (parts.base && GET_CODE (parts.base) == SUBREG)
10885 parts.base = SUBREG_REG (parts.base);
10886 if (parts.index && GET_CODE (parts.index) == SUBREG)
10887 parts.index = SUBREG_REG (parts.index);
10889 /* Attempt to minimize number of registers in the address. */
10891 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10893 && (!REG_P (parts.index)
10894 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10898 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10900 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10901 && parts.base != parts.index)
10904 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10905 since it's predecode logic can't detect the length of instructions
10906 and it degenerates to vector decoded. Increase cost of such
10907 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10908 to split such addresses or even refuse such addresses at all.
10910 Following addressing modes are affected:
10915 The first and last case may be avoidable by explicitly coding the zero in
10916 memory address, but I don't have AMD-K6 machine handy to check this
10920 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10921 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10922 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10928 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10929 this is used for to form addresses to local data when -fPIC is in
10933 darwin_local_data_pic (rtx disp)
10935 return (GET_CODE (disp) == UNSPEC
10936 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10939 /* Determine if a given RTX is a valid constant. We already know this
10940 satisfies CONSTANT_P. */
10943 legitimate_constant_p (rtx x)
10945 switch (GET_CODE (x))
10950 if (GET_CODE (x) == PLUS)
10952 if (!CONST_INT_P (XEXP (x, 1)))
10957 if (TARGET_MACHO && darwin_local_data_pic (x))
10960 /* Only some unspecs are valid as "constants". */
10961 if (GET_CODE (x) == UNSPEC)
10962 switch (XINT (x, 1))
10965 case UNSPEC_GOTOFF:
10966 case UNSPEC_PLTOFF:
10967 return TARGET_64BIT;
10969 case UNSPEC_NTPOFF:
10970 x = XVECEXP (x, 0, 0);
10971 return (GET_CODE (x) == SYMBOL_REF
10972 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10973 case UNSPEC_DTPOFF:
10974 x = XVECEXP (x, 0, 0);
10975 return (GET_CODE (x) == SYMBOL_REF
10976 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10981 /* We must have drilled down to a symbol. */
10982 if (GET_CODE (x) == LABEL_REF)
10984 if (GET_CODE (x) != SYMBOL_REF)
10989 /* TLS symbols are never valid. */
10990 if (SYMBOL_REF_TLS_MODEL (x))
10993 /* DLLIMPORT symbols are never valid. */
10994 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10995 && SYMBOL_REF_DLLIMPORT_P (x))
11000 if (GET_MODE (x) == TImode
11001 && x != CONST0_RTX (TImode)
11007 if (!standard_sse_constant_p (x))
11014 /* Otherwise we handle everything else in the move patterns. */
11018 /* Determine if it's legal to put X into the constant pool. This
11019 is not possible for the address of thread-local symbols, which
11020 is checked above. */
11023 ix86_cannot_force_const_mem (rtx x)
11025 /* We can always put integral constants and vectors in memory. */
11026 switch (GET_CODE (x))
11036 return !legitimate_constant_p (x);
11040 /* Nonzero if the constant value X is a legitimate general operand
11041 when generating PIC code. It is given that flag_pic is on and
11042 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11045 legitimate_pic_operand_p (rtx x)
11049 switch (GET_CODE (x))
11052 inner = XEXP (x, 0);
11053 if (GET_CODE (inner) == PLUS
11054 && CONST_INT_P (XEXP (inner, 1)))
11055 inner = XEXP (inner, 0);
11057 /* Only some unspecs are valid as "constants". */
11058 if (GET_CODE (inner) == UNSPEC)
11059 switch (XINT (inner, 1))
11062 case UNSPEC_GOTOFF:
11063 case UNSPEC_PLTOFF:
11064 return TARGET_64BIT;
11066 x = XVECEXP (inner, 0, 0);
11067 return (GET_CODE (x) == SYMBOL_REF
11068 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11069 case UNSPEC_MACHOPIC_OFFSET:
11070 return legitimate_pic_address_disp_p (x);
11078 return legitimate_pic_address_disp_p (x);
11085 /* Determine if a given CONST RTX is a valid memory displacement
11089 legitimate_pic_address_disp_p (rtx disp)
11093 /* In 64bit mode we can allow direct addresses of symbols and labels
11094 when they are not dynamic symbols. */
11097 rtx op0 = disp, op1;
11099 switch (GET_CODE (disp))
11105 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11107 op0 = XEXP (XEXP (disp, 0), 0);
11108 op1 = XEXP (XEXP (disp, 0), 1);
11109 if (!CONST_INT_P (op1)
11110 || INTVAL (op1) >= 16*1024*1024
11111 || INTVAL (op1) < -16*1024*1024)
11113 if (GET_CODE (op0) == LABEL_REF)
11115 if (GET_CODE (op0) != SYMBOL_REF)
11120 /* TLS references should always be enclosed in UNSPEC. */
11121 if (SYMBOL_REF_TLS_MODEL (op0))
11123 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11124 && ix86_cmodel != CM_LARGE_PIC)
11132 if (GET_CODE (disp) != CONST)
11134 disp = XEXP (disp, 0);
11138 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11139 of GOT tables. We should not need these anyway. */
11140 if (GET_CODE (disp) != UNSPEC
11141 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11142 && XINT (disp, 1) != UNSPEC_GOTOFF
11143 && XINT (disp, 1) != UNSPEC_PLTOFF))
11146 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11147 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11153 if (GET_CODE (disp) == PLUS)
11155 if (!CONST_INT_P (XEXP (disp, 1)))
11157 disp = XEXP (disp, 0);
11161 if (TARGET_MACHO && darwin_local_data_pic (disp))
11164 if (GET_CODE (disp) != UNSPEC)
11167 switch (XINT (disp, 1))
11172 /* We need to check for both symbols and labels because VxWorks loads
11173 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11175 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11176 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11177 case UNSPEC_GOTOFF:
11178 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11179 While ABI specify also 32bit relocation but we don't produce it in
11180 small PIC model at all. */
11181 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11182 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11184 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11186 case UNSPEC_GOTTPOFF:
11187 case UNSPEC_GOTNTPOFF:
11188 case UNSPEC_INDNTPOFF:
11191 disp = XVECEXP (disp, 0, 0);
11192 return (GET_CODE (disp) == SYMBOL_REF
11193 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11194 case UNSPEC_NTPOFF:
11195 disp = XVECEXP (disp, 0, 0);
11196 return (GET_CODE (disp) == SYMBOL_REF
11197 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11198 case UNSPEC_DTPOFF:
11199 disp = XVECEXP (disp, 0, 0);
11200 return (GET_CODE (disp) == SYMBOL_REF
11201 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11207 /* Recognizes RTL expressions that are valid memory addresses for an
11208 instruction. The MODE argument is the machine mode for the MEM
11209 expression that wants to use this address.
11211 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11212 convert common non-canonical forms to canonical form so that they will
11216 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11217 rtx addr, bool strict)
11219 struct ix86_address parts;
11220 rtx base, index, disp;
11221 HOST_WIDE_INT scale;
11223 if (ix86_decompose_address (addr, &parts) <= 0)
11224 /* Decomposition failed. */
11228 index = parts.index;
11230 scale = parts.scale;
11232 /* Validate base register.
11234 Don't allow SUBREG's that span more than a word here. It can lead to spill
11235 failures when the base is one word out of a two word structure, which is
11236 represented internally as a DImode int. */
11244 else if (GET_CODE (base) == SUBREG
11245 && REG_P (SUBREG_REG (base))
11246 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11248 reg = SUBREG_REG (base);
11250 /* Base is not a register. */
11253 if (GET_MODE (base) != Pmode)
11254 /* Base is not in Pmode. */
11257 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11258 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11259 /* Base is not valid. */
11263 /* Validate index register.
11265 Don't allow SUBREG's that span more than a word here -- same as above. */
11273 else if (GET_CODE (index) == SUBREG
11274 && REG_P (SUBREG_REG (index))
11275 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11277 reg = SUBREG_REG (index);
11279 /* Index is not a register. */
11282 if (GET_MODE (index) != Pmode)
11283 /* Index is not in Pmode. */
11286 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11287 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11288 /* Index is not valid. */
11292 /* Validate scale factor. */
11296 /* Scale without index. */
11299 if (scale != 2 && scale != 4 && scale != 8)
11300 /* Scale is not a valid multiplier. */
11304 /* Validate displacement. */
11307 if (GET_CODE (disp) == CONST
11308 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11309 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11310 switch (XINT (XEXP (disp, 0), 1))
11312 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11313 used. While ABI specify also 32bit relocations, we don't produce
11314 them at all and use IP relative instead. */
11316 case UNSPEC_GOTOFF:
11317 gcc_assert (flag_pic);
11319 goto is_legitimate_pic;
11321 /* 64bit address unspec. */
11324 case UNSPEC_GOTPCREL:
11325 gcc_assert (flag_pic);
11326 goto is_legitimate_pic;
11328 case UNSPEC_GOTTPOFF:
11329 case UNSPEC_GOTNTPOFF:
11330 case UNSPEC_INDNTPOFF:
11331 case UNSPEC_NTPOFF:
11332 case UNSPEC_DTPOFF:
11335 case UNSPEC_STACK_CHECK:
11336 gcc_assert (flag_split_stack);
11340 /* Invalid address unspec. */
11344 else if (SYMBOLIC_CONST (disp)
11348 && MACHOPIC_INDIRECT
11349 && !machopic_operand_p (disp)
11355 if (TARGET_64BIT && (index || base))
11357 /* foo@dtpoff(%rX) is ok. */
11358 if (GET_CODE (disp) != CONST
11359 || GET_CODE (XEXP (disp, 0)) != PLUS
11360 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11361 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11362 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11363 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11364 /* Non-constant pic memory reference. */
11367 else if (! legitimate_pic_address_disp_p (disp))
11368 /* Displacement is an invalid pic construct. */
11371 /* This code used to verify that a symbolic pic displacement
11372 includes the pic_offset_table_rtx register.
11374 While this is good idea, unfortunately these constructs may
11375 be created by "adds using lea" optimization for incorrect
11384 This code is nonsensical, but results in addressing
11385 GOT table with pic_offset_table_rtx base. We can't
11386 just refuse it easily, since it gets matched by
11387 "addsi3" pattern, that later gets split to lea in the
11388 case output register differs from input. While this
11389 can be handled by separate addsi pattern for this case
11390 that never results in lea, this seems to be easier and
11391 correct fix for crash to disable this test. */
11393 else if (GET_CODE (disp) != LABEL_REF
11394 && !CONST_INT_P (disp)
11395 && (GET_CODE (disp) != CONST
11396 || !legitimate_constant_p (disp))
11397 && (GET_CODE (disp) != SYMBOL_REF
11398 || !legitimate_constant_p (disp)))
11399 /* Displacement is not constant. */
11401 else if (TARGET_64BIT
11402 && !x86_64_immediate_operand (disp, VOIDmode))
11403 /* Displacement is out of range. */
11407 /* Everything looks valid. */
11411 /* Determine if a given RTX is a valid constant address. */
11414 constant_address_p (rtx x)
11416 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11419 /* Return a unique alias set for the GOT. */
11421 static alias_set_type
11422 ix86_GOT_alias_set (void)
11424 static alias_set_type set = -1;
11426 set = new_alias_set ();
11430 /* Return a legitimate reference for ORIG (an address) using the
11431 register REG. If REG is 0, a new pseudo is generated.
11433 There are two types of references that must be handled:
11435 1. Global data references must load the address from the GOT, via
11436 the PIC reg. An insn is emitted to do this load, and the reg is
11439 2. Static data references, constant pool addresses, and code labels
11440 compute the address as an offset from the GOT, whose base is in
11441 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11442 differentiate them from global data objects. The returned
11443 address is the PIC reg + an unspec constant.
11445 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11446 reg also appears in the address. */
11449 legitimize_pic_address (rtx orig, rtx reg)
11452 rtx new_rtx = orig;
11456 if (TARGET_MACHO && !TARGET_64BIT)
11459 reg = gen_reg_rtx (Pmode);
11460 /* Use the generic Mach-O PIC machinery. */
11461 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11465 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11467 else if (TARGET_64BIT
11468 && ix86_cmodel != CM_SMALL_PIC
11469 && gotoff_operand (addr, Pmode))
11472 /* This symbol may be referenced via a displacement from the PIC
11473 base address (@GOTOFF). */
11475 if (reload_in_progress)
11476 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11477 if (GET_CODE (addr) == CONST)
11478 addr = XEXP (addr, 0);
11479 if (GET_CODE (addr) == PLUS)
11481 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11483 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11486 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11487 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11489 tmpreg = gen_reg_rtx (Pmode);
11492 emit_move_insn (tmpreg, new_rtx);
11496 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11497 tmpreg, 1, OPTAB_DIRECT);
11500 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11502 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11504 /* This symbol may be referenced via a displacement from the PIC
11505 base address (@GOTOFF). */
11507 if (reload_in_progress)
11508 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11509 if (GET_CODE (addr) == CONST)
11510 addr = XEXP (addr, 0);
11511 if (GET_CODE (addr) == PLUS)
11513 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11515 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11518 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11519 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11520 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11524 emit_move_insn (reg, new_rtx);
11528 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11529 /* We can't use @GOTOFF for text labels on VxWorks;
11530 see gotoff_operand. */
11531 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11533 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11535 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11536 return legitimize_dllimport_symbol (addr, true);
11537 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11538 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11539 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11541 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11542 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11546 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11548 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11549 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11550 new_rtx = gen_const_mem (Pmode, new_rtx);
11551 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11554 reg = gen_reg_rtx (Pmode);
11555 /* Use directly gen_movsi, otherwise the address is loaded
11556 into register for CSE. We don't want to CSE this addresses,
11557 instead we CSE addresses from the GOT table, so skip this. */
11558 emit_insn (gen_movsi (reg, new_rtx));
11563 /* This symbol must be referenced via a load from the
11564 Global Offset Table (@GOT). */
11566 if (reload_in_progress)
11567 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11568 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11569 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11571 new_rtx = force_reg (Pmode, new_rtx);
11572 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11573 new_rtx = gen_const_mem (Pmode, new_rtx);
11574 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11577 reg = gen_reg_rtx (Pmode);
11578 emit_move_insn (reg, new_rtx);
11584 if (CONST_INT_P (addr)
11585 && !x86_64_immediate_operand (addr, VOIDmode))
11589 emit_move_insn (reg, addr);
11593 new_rtx = force_reg (Pmode, addr);
11595 else if (GET_CODE (addr) == CONST)
11597 addr = XEXP (addr, 0);
11599 /* We must match stuff we generate before. Assume the only
11600 unspecs that can get here are ours. Not that we could do
11601 anything with them anyway.... */
11602 if (GET_CODE (addr) == UNSPEC
11603 || (GET_CODE (addr) == PLUS
11604 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11606 gcc_assert (GET_CODE (addr) == PLUS);
11608 if (GET_CODE (addr) == PLUS)
11610 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11612 /* Check first to see if this is a constant offset from a @GOTOFF
11613 symbol reference. */
11614 if (gotoff_operand (op0, Pmode)
11615 && CONST_INT_P (op1))
11619 if (reload_in_progress)
11620 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11621 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11623 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11624 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11625 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11629 emit_move_insn (reg, new_rtx);
11635 if (INTVAL (op1) < -16*1024*1024
11636 || INTVAL (op1) >= 16*1024*1024)
11638 if (!x86_64_immediate_operand (op1, Pmode))
11639 op1 = force_reg (Pmode, op1);
11640 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11646 base = legitimize_pic_address (XEXP (addr, 0), reg);
11647 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11648 base == reg ? NULL_RTX : reg);
11650 if (CONST_INT_P (new_rtx))
11651 new_rtx = plus_constant (base, INTVAL (new_rtx));
11654 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11656 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11657 new_rtx = XEXP (new_rtx, 1);
11659 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11667 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11670 get_thread_pointer (int to_reg)
11674 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11678 reg = gen_reg_rtx (Pmode);
11679 insn = gen_rtx_SET (VOIDmode, reg, tp);
11680 insn = emit_insn (insn);
11685 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11686 false if we expect this to be used for a memory address and true if
11687 we expect to load the address into a register. */
11690 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11692 rtx dest, base, off, pic, tp;
11697 case TLS_MODEL_GLOBAL_DYNAMIC:
11698 dest = gen_reg_rtx (Pmode);
11699 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11701 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11703 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11706 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11707 insns = get_insns ();
11710 RTL_CONST_CALL_P (insns) = 1;
11711 emit_libcall_block (insns, dest, rax, x);
11713 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11714 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11716 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11718 if (TARGET_GNU2_TLS)
11720 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11722 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11726 case TLS_MODEL_LOCAL_DYNAMIC:
11727 base = gen_reg_rtx (Pmode);
11728 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11730 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11732 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11735 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11736 insns = get_insns ();
11739 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11740 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11741 RTL_CONST_CALL_P (insns) = 1;
11742 emit_libcall_block (insns, base, rax, note);
11744 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11745 emit_insn (gen_tls_local_dynamic_base_64 (base));
11747 emit_insn (gen_tls_local_dynamic_base_32 (base));
11749 if (TARGET_GNU2_TLS)
11751 rtx x = ix86_tls_module_base ();
11753 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11754 gen_rtx_MINUS (Pmode, x, tp));
11757 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11758 off = gen_rtx_CONST (Pmode, off);
11760 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11762 if (TARGET_GNU2_TLS)
11764 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11766 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11771 case TLS_MODEL_INITIAL_EXEC:
11775 type = UNSPEC_GOTNTPOFF;
11779 if (reload_in_progress)
11780 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11781 pic = pic_offset_table_rtx;
11782 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11784 else if (!TARGET_ANY_GNU_TLS)
11786 pic = gen_reg_rtx (Pmode);
11787 emit_insn (gen_set_got (pic));
11788 type = UNSPEC_GOTTPOFF;
11793 type = UNSPEC_INDNTPOFF;
11796 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11797 off = gen_rtx_CONST (Pmode, off);
11799 off = gen_rtx_PLUS (Pmode, pic, off);
11800 off = gen_const_mem (Pmode, off);
11801 set_mem_alias_set (off, ix86_GOT_alias_set ());
11803 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11805 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11806 off = force_reg (Pmode, off);
11807 return gen_rtx_PLUS (Pmode, base, off);
11811 base = get_thread_pointer (true);
11812 dest = gen_reg_rtx (Pmode);
11813 emit_insn (gen_subsi3 (dest, base, off));
11817 case TLS_MODEL_LOCAL_EXEC:
11818 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11819 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11820 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11821 off = gen_rtx_CONST (Pmode, off);
11823 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11825 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11826 return gen_rtx_PLUS (Pmode, base, off);
11830 base = get_thread_pointer (true);
11831 dest = gen_reg_rtx (Pmode);
11832 emit_insn (gen_subsi3 (dest, base, off));
11837 gcc_unreachable ();
11843 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11846 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11847 htab_t dllimport_map;
11850 get_dllimport_decl (tree decl)
11852 struct tree_map *h, in;
11855 const char *prefix;
11856 size_t namelen, prefixlen;
11861 if (!dllimport_map)
11862 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11864 in.hash = htab_hash_pointer (decl);
11865 in.base.from = decl;
11866 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11867 h = (struct tree_map *) *loc;
11871 *loc = h = ggc_alloc_tree_map ();
11873 h->base.from = decl;
11874 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11875 VAR_DECL, NULL, ptr_type_node);
11876 DECL_ARTIFICIAL (to) = 1;
11877 DECL_IGNORED_P (to) = 1;
11878 DECL_EXTERNAL (to) = 1;
11879 TREE_READONLY (to) = 1;
11881 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11882 name = targetm.strip_name_encoding (name);
11883 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11884 ? "*__imp_" : "*__imp__";
11885 namelen = strlen (name);
11886 prefixlen = strlen (prefix);
11887 imp_name = (char *) alloca (namelen + prefixlen + 1);
11888 memcpy (imp_name, prefix, prefixlen);
11889 memcpy (imp_name + prefixlen, name, namelen + 1);
11891 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11892 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11893 SET_SYMBOL_REF_DECL (rtl, to);
11894 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11896 rtl = gen_const_mem (Pmode, rtl);
11897 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11899 SET_DECL_RTL (to, rtl);
11900 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11905 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11906 true if we require the result be a register. */
11909 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11914 gcc_assert (SYMBOL_REF_DECL (symbol));
11915 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11917 x = DECL_RTL (imp_decl);
11919 x = force_reg (Pmode, x);
11923 /* Try machine-dependent ways of modifying an illegitimate address
11924 to be legitimate. If we find one, return the new, valid address.
11925 This macro is used in only one place: `memory_address' in explow.c.
11927 OLDX is the address as it was before break_out_memory_refs was called.
11928 In some cases it is useful to look at this to decide what needs to be done.
11930 It is always safe for this macro to do nothing. It exists to recognize
11931 opportunities to optimize the output.
11933 For the 80386, we handle X+REG by loading X into a register R and
11934 using R+REG. R will go in a general reg and indexing will be used.
11935 However, if REG is a broken-out memory address or multiplication,
11936 nothing needs to be done because REG can certainly go in a general reg.
11938 When -fpic is used, special handling is needed for symbolic references.
11939 See comments by legitimize_pic_address in i386.c for details. */
11942 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11943 enum machine_mode mode)
11948 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11950 return legitimize_tls_address (x, (enum tls_model) log, false);
11951 if (GET_CODE (x) == CONST
11952 && GET_CODE (XEXP (x, 0)) == PLUS
11953 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11954 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11956 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11957 (enum tls_model) log, false);
11958 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11961 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11963 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11964 return legitimize_dllimport_symbol (x, true);
11965 if (GET_CODE (x) == CONST
11966 && GET_CODE (XEXP (x, 0)) == PLUS
11967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11968 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11970 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11971 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11975 if (flag_pic && SYMBOLIC_CONST (x))
11976 return legitimize_pic_address (x, 0);
11978 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11979 if (GET_CODE (x) == ASHIFT
11980 && CONST_INT_P (XEXP (x, 1))
11981 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11984 log = INTVAL (XEXP (x, 1));
11985 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11986 GEN_INT (1 << log));
11989 if (GET_CODE (x) == PLUS)
11991 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11993 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11994 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11995 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11998 log = INTVAL (XEXP (XEXP (x, 0), 1));
11999 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12000 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12001 GEN_INT (1 << log));
12004 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12005 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12006 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12009 log = INTVAL (XEXP (XEXP (x, 1), 1));
12010 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12011 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12012 GEN_INT (1 << log));
12015 /* Put multiply first if it isn't already. */
12016 if (GET_CODE (XEXP (x, 1)) == MULT)
12018 rtx tmp = XEXP (x, 0);
12019 XEXP (x, 0) = XEXP (x, 1);
12024 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12025 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12026 created by virtual register instantiation, register elimination, and
12027 similar optimizations. */
12028 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12031 x = gen_rtx_PLUS (Pmode,
12032 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12033 XEXP (XEXP (x, 1), 0)),
12034 XEXP (XEXP (x, 1), 1));
12038 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12039 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12040 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12041 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12042 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12043 && CONSTANT_P (XEXP (x, 1)))
12046 rtx other = NULL_RTX;
12048 if (CONST_INT_P (XEXP (x, 1)))
12050 constant = XEXP (x, 1);
12051 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12053 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12055 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12056 other = XEXP (x, 1);
12064 x = gen_rtx_PLUS (Pmode,
12065 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12066 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12067 plus_constant (other, INTVAL (constant)));
12071 if (changed && ix86_legitimate_address_p (mode, x, false))
12074 if (GET_CODE (XEXP (x, 0)) == MULT)
12077 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12080 if (GET_CODE (XEXP (x, 1)) == MULT)
12083 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12087 && REG_P (XEXP (x, 1))
12088 && REG_P (XEXP (x, 0)))
12091 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12094 x = legitimize_pic_address (x, 0);
12097 if (changed && ix86_legitimate_address_p (mode, x, false))
12100 if (REG_P (XEXP (x, 0)))
12102 rtx temp = gen_reg_rtx (Pmode);
12103 rtx val = force_operand (XEXP (x, 1), temp);
12105 emit_move_insn (temp, val);
12107 XEXP (x, 1) = temp;
12111 else if (REG_P (XEXP (x, 1)))
12113 rtx temp = gen_reg_rtx (Pmode);
12114 rtx val = force_operand (XEXP (x, 0), temp);
12116 emit_move_insn (temp, val);
12118 XEXP (x, 0) = temp;
12126 /* Print an integer constant expression in assembler syntax. Addition
12127 and subtraction are the only arithmetic that may appear in these
12128 expressions. FILE is the stdio stream to write to, X is the rtx, and
12129 CODE is the operand print code from the output string. */
12132 output_pic_addr_const (FILE *file, rtx x, int code)
12136 switch (GET_CODE (x))
12139 gcc_assert (flag_pic);
12144 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12145 output_addr_const (file, x);
12148 const char *name = XSTR (x, 0);
12150 /* Mark the decl as referenced so that cgraph will
12151 output the function. */
12152 if (SYMBOL_REF_DECL (x))
12153 mark_decl_referenced (SYMBOL_REF_DECL (x));
12156 if (MACHOPIC_INDIRECT
12157 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12158 name = machopic_indirection_name (x, /*stub_p=*/true);
12160 assemble_name (file, name);
12162 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12163 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12164 fputs ("@PLT", file);
12171 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12172 assemble_name (asm_out_file, buf);
12176 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12180 /* This used to output parentheses around the expression,
12181 but that does not work on the 386 (either ATT or BSD assembler). */
12182 output_pic_addr_const (file, XEXP (x, 0), code);
12186 if (GET_MODE (x) == VOIDmode)
12188 /* We can use %d if the number is <32 bits and positive. */
12189 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12190 fprintf (file, "0x%lx%08lx",
12191 (unsigned long) CONST_DOUBLE_HIGH (x),
12192 (unsigned long) CONST_DOUBLE_LOW (x));
12194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12197 /* We can't handle floating point constants;
12198 TARGET_PRINT_OPERAND must handle them. */
12199 output_operand_lossage ("floating constant misused");
12203 /* Some assemblers need integer constants to appear first. */
12204 if (CONST_INT_P (XEXP (x, 0)))
12206 output_pic_addr_const (file, XEXP (x, 0), code);
12208 output_pic_addr_const (file, XEXP (x, 1), code);
12212 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12213 output_pic_addr_const (file, XEXP (x, 1), code);
12215 output_pic_addr_const (file, XEXP (x, 0), code);
12221 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12222 output_pic_addr_const (file, XEXP (x, 0), code);
12224 output_pic_addr_const (file, XEXP (x, 1), code);
12226 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12230 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12232 bool f = i386_asm_output_addr_const_extra (file, x);
12237 gcc_assert (XVECLEN (x, 0) == 1);
12238 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12239 switch (XINT (x, 1))
12242 fputs ("@GOT", file);
12244 case UNSPEC_GOTOFF:
12245 fputs ("@GOTOFF", file);
12247 case UNSPEC_PLTOFF:
12248 fputs ("@PLTOFF", file);
12250 case UNSPEC_GOTPCREL:
12251 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12252 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12254 case UNSPEC_GOTTPOFF:
12255 /* FIXME: This might be @TPOFF in Sun ld too. */
12256 fputs ("@gottpoff", file);
12259 fputs ("@tpoff", file);
12261 case UNSPEC_NTPOFF:
12263 fputs ("@tpoff", file);
12265 fputs ("@ntpoff", file);
12267 case UNSPEC_DTPOFF:
12268 fputs ("@dtpoff", file);
12270 case UNSPEC_GOTNTPOFF:
12272 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12273 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12275 fputs ("@gotntpoff", file);
12277 case UNSPEC_INDNTPOFF:
12278 fputs ("@indntpoff", file);
12281 case UNSPEC_MACHOPIC_OFFSET:
12283 machopic_output_function_base_name (file);
12287 output_operand_lossage ("invalid UNSPEC as operand");
12293 output_operand_lossage ("invalid expression as operand");
12297 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12298 We need to emit DTP-relative relocations. */
12300 static void ATTRIBUTE_UNUSED
12301 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12303 fputs (ASM_LONG, file);
12304 output_addr_const (file, x);
12305 fputs ("@dtpoff", file);
12311 fputs (", 0", file);
12314 gcc_unreachable ();
12318 /* Return true if X is a representation of the PIC register. This copes
12319 with calls from ix86_find_base_term, where the register might have
12320 been replaced by a cselib value. */
12323 ix86_pic_register_p (rtx x)
12325 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12326 return (pic_offset_table_rtx
12327 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12329 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12332 /* In the name of slightly smaller debug output, and to cater to
12333 general assembler lossage, recognize PIC+GOTOFF and turn it back
12334 into a direct symbol reference.
12336 On Darwin, this is necessary to avoid a crash, because Darwin
12337 has a different PIC label for each routine but the DWARF debugging
12338 information is not associated with any particular routine, so it's
12339 necessary to remove references to the PIC label from RTL stored by
12340 the DWARF output code. */
12343 ix86_delegitimize_address (rtx x)
12345 rtx orig_x = delegitimize_mem_from_attrs (x);
12346 /* addend is NULL or some rtx if x is something+GOTOFF where
12347 something doesn't include the PIC register. */
12348 rtx addend = NULL_RTX;
12349 /* reg_addend is NULL or a multiple of some register. */
12350 rtx reg_addend = NULL_RTX;
12351 /* const_addend is NULL or a const_int. */
12352 rtx const_addend = NULL_RTX;
12353 /* This is the result, or NULL. */
12354 rtx result = NULL_RTX;
12363 if (GET_CODE (x) != CONST
12364 || GET_CODE (XEXP (x, 0)) != UNSPEC
12365 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12366 || !MEM_P (orig_x))
12368 x = XVECEXP (XEXP (x, 0), 0, 0);
12369 if (GET_MODE (orig_x) != Pmode)
12370 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12374 if (GET_CODE (x) != PLUS
12375 || GET_CODE (XEXP (x, 1)) != CONST)
12378 if (ix86_pic_register_p (XEXP (x, 0)))
12379 /* %ebx + GOT/GOTOFF */
12381 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12383 /* %ebx + %reg * scale + GOT/GOTOFF */
12384 reg_addend = XEXP (x, 0);
12385 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12386 reg_addend = XEXP (reg_addend, 1);
12387 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12388 reg_addend = XEXP (reg_addend, 0);
12391 reg_addend = NULL_RTX;
12392 addend = XEXP (x, 0);
12396 addend = XEXP (x, 0);
12398 x = XEXP (XEXP (x, 1), 0);
12399 if (GET_CODE (x) == PLUS
12400 && CONST_INT_P (XEXP (x, 1)))
12402 const_addend = XEXP (x, 1);
12406 if (GET_CODE (x) == UNSPEC
12407 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12408 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12409 result = XVECEXP (x, 0, 0);
12411 if (TARGET_MACHO && darwin_local_data_pic (x)
12412 && !MEM_P (orig_x))
12413 result = XVECEXP (x, 0, 0);
12419 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12421 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12424 /* If the rest of original X doesn't involve the PIC register, add
12425 addend and subtract pic_offset_table_rtx. This can happen e.g.
12427 leal (%ebx, %ecx, 4), %ecx
12429 movl foo@GOTOFF(%ecx), %edx
12430 in which case we return (%ecx - %ebx) + foo. */
12431 if (pic_offset_table_rtx)
12432 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12433 pic_offset_table_rtx),
12438 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12439 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12443 /* If X is a machine specific address (i.e. a symbol or label being
12444 referenced as a displacement from the GOT implemented using an
12445 UNSPEC), then return the base term. Otherwise return X. */
12448 ix86_find_base_term (rtx x)
12454 if (GET_CODE (x) != CONST)
12456 term = XEXP (x, 0);
12457 if (GET_CODE (term) == PLUS
12458 && (CONST_INT_P (XEXP (term, 1))
12459 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12460 term = XEXP (term, 0);
12461 if (GET_CODE (term) != UNSPEC
12462 || XINT (term, 1) != UNSPEC_GOTPCREL)
12465 return XVECEXP (term, 0, 0);
12468 return ix86_delegitimize_address (x);
12472 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12473 int fp, FILE *file)
12475 const char *suffix;
12477 if (mode == CCFPmode || mode == CCFPUmode)
12479 code = ix86_fp_compare_code_to_integer (code);
12483 code = reverse_condition (code);
12534 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12538 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12539 Those same assemblers have the same but opposite lossage on cmov. */
12540 if (mode == CCmode)
12541 suffix = fp ? "nbe" : "a";
12542 else if (mode == CCCmode)
12545 gcc_unreachable ();
12561 gcc_unreachable ();
12565 gcc_assert (mode == CCmode || mode == CCCmode);
12582 gcc_unreachable ();
12586 /* ??? As above. */
12587 gcc_assert (mode == CCmode || mode == CCCmode);
12588 suffix = fp ? "nb" : "ae";
12591 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12595 /* ??? As above. */
12596 if (mode == CCmode)
12598 else if (mode == CCCmode)
12599 suffix = fp ? "nb" : "ae";
12601 gcc_unreachable ();
12604 suffix = fp ? "u" : "p";
12607 suffix = fp ? "nu" : "np";
12610 gcc_unreachable ();
12612 fputs (suffix, file);
12615 /* Print the name of register X to FILE based on its machine mode and number.
12616 If CODE is 'w', pretend the mode is HImode.
12617 If CODE is 'b', pretend the mode is QImode.
12618 If CODE is 'k', pretend the mode is SImode.
12619 If CODE is 'q', pretend the mode is DImode.
12620 If CODE is 'x', pretend the mode is V4SFmode.
12621 If CODE is 't', pretend the mode is V8SFmode.
12622 If CODE is 'h', pretend the reg is the 'high' byte register.
12623 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12624 If CODE is 'd', duplicate the operand for AVX instruction.
12628 print_reg (rtx x, int code, FILE *file)
12631 bool duplicated = code == 'd' && TARGET_AVX;
12633 gcc_assert (x == pc_rtx
12634 || (REGNO (x) != ARG_POINTER_REGNUM
12635 && REGNO (x) != FRAME_POINTER_REGNUM
12636 && REGNO (x) != FLAGS_REG
12637 && REGNO (x) != FPSR_REG
12638 && REGNO (x) != FPCR_REG));
12640 if (ASSEMBLER_DIALECT == ASM_ATT)
12645 gcc_assert (TARGET_64BIT);
12646 fputs ("rip", file);
12650 if (code == 'w' || MMX_REG_P (x))
12652 else if (code == 'b')
12654 else if (code == 'k')
12656 else if (code == 'q')
12658 else if (code == 'y')
12660 else if (code == 'h')
12662 else if (code == 'x')
12664 else if (code == 't')
12667 code = GET_MODE_SIZE (GET_MODE (x));
12669 /* Irritatingly, AMD extended registers use different naming convention
12670 from the normal registers. */
12671 if (REX_INT_REG_P (x))
12673 gcc_assert (TARGET_64BIT);
12677 error ("extended registers have no high halves");
12680 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12683 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12686 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12689 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12692 error ("unsupported operand size for extended register");
12702 if (STACK_TOP_P (x))
12711 if (! ANY_FP_REG_P (x))
12712 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12717 reg = hi_reg_name[REGNO (x)];
12720 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12722 reg = qi_reg_name[REGNO (x)];
12725 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12727 reg = qi_high_reg_name[REGNO (x)];
12732 gcc_assert (!duplicated);
12734 fputs (hi_reg_name[REGNO (x)] + 1, file);
12739 gcc_unreachable ();
12745 if (ASSEMBLER_DIALECT == ASM_ATT)
12746 fprintf (file, ", %%%s", reg);
12748 fprintf (file, ", %s", reg);
12752 /* Locate some local-dynamic symbol still in use by this function
12753 so that we can print its name in some tls_local_dynamic_base
12757 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12761 if (GET_CODE (x) == SYMBOL_REF
12762 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12764 cfun->machine->some_ld_name = XSTR (x, 0);
12771 static const char *
12772 get_some_local_dynamic_name (void)
12776 if (cfun->machine->some_ld_name)
12777 return cfun->machine->some_ld_name;
12779 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12780 if (NONDEBUG_INSN_P (insn)
12781 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12782 return cfun->machine->some_ld_name;
12787 /* Meaning of CODE:
12788 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12789 C -- print opcode suffix for set/cmov insn.
12790 c -- like C, but print reversed condition
12791 F,f -- likewise, but for floating-point.
12792 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12794 R -- print the prefix for register names.
12795 z -- print the opcode suffix for the size of the current operand.
12796 Z -- likewise, with special suffixes for x87 instructions.
12797 * -- print a star (in certain assembler syntax)
12798 A -- print an absolute memory reference.
12799 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12800 s -- print a shift double count, followed by the assemblers argument
12802 b -- print the QImode name of the register for the indicated operand.
12803 %b0 would print %al if operands[0] is reg 0.
12804 w -- likewise, print the HImode name of the register.
12805 k -- likewise, print the SImode name of the register.
12806 q -- likewise, print the DImode name of the register.
12807 x -- likewise, print the V4SFmode name of the register.
12808 t -- likewise, print the V8SFmode name of the register.
12809 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12810 y -- print "st(0)" instead of "st" as a register.
12811 d -- print duplicated register operand for AVX instruction.
12812 D -- print condition for SSE cmp instruction.
12813 P -- if PIC, print an @PLT suffix.
12814 X -- don't print any sort of PIC '@' suffix for a symbol.
12815 & -- print some in-use local-dynamic symbol name.
12816 H -- print a memory address offset by 8; used for sse high-parts
12817 Y -- print condition for XOP pcom* instruction.
12818 + -- print a branch hint as 'cs' or 'ds' prefix
12819 ; -- print a semicolon (after prefixes due to bug in older gas).
12820 @ -- print a segment register of thread base pointer load
12824 ix86_print_operand (FILE *file, rtx x, int code)
12831 if (ASSEMBLER_DIALECT == ASM_ATT)
12837 const char *name = get_some_local_dynamic_name ();
12839 output_operand_lossage ("'%%&' used without any "
12840 "local dynamic TLS references");
12842 assemble_name (file, name);
12847 switch (ASSEMBLER_DIALECT)
12854 /* Intel syntax. For absolute addresses, registers should not
12855 be surrounded by braces. */
12859 ix86_print_operand (file, x, 0);
12866 gcc_unreachable ();
12869 ix86_print_operand (file, x, 0);
12874 if (ASSEMBLER_DIALECT == ASM_ATT)
12879 if (ASSEMBLER_DIALECT == ASM_ATT)
12884 if (ASSEMBLER_DIALECT == ASM_ATT)
12889 if (ASSEMBLER_DIALECT == ASM_ATT)
12894 if (ASSEMBLER_DIALECT == ASM_ATT)
12899 if (ASSEMBLER_DIALECT == ASM_ATT)
12904 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12906 /* Opcodes don't get size suffixes if using Intel opcodes. */
12907 if (ASSEMBLER_DIALECT == ASM_INTEL)
12910 switch (GET_MODE_SIZE (GET_MODE (x)))
12929 output_operand_lossage
12930 ("invalid operand size for operand code '%c'", code);
12935 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12937 (0, "non-integer operand used with operand code '%c'", code);
12941 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12942 if (ASSEMBLER_DIALECT == ASM_INTEL)
12945 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12947 switch (GET_MODE_SIZE (GET_MODE (x)))
12950 #ifdef HAVE_AS_IX86_FILDS
12960 #ifdef HAVE_AS_IX86_FILDQ
12963 fputs ("ll", file);
12971 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12973 /* 387 opcodes don't get size suffixes
12974 if the operands are registers. */
12975 if (STACK_REG_P (x))
12978 switch (GET_MODE_SIZE (GET_MODE (x)))
12999 output_operand_lossage
13000 ("invalid operand type used with operand code '%c'", code);
13004 output_operand_lossage
13005 ("invalid operand size for operand code '%c'", code);
13022 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13024 ix86_print_operand (file, x, 0);
13025 fputs (", ", file);
13030 /* Little bit of braindamage here. The SSE compare instructions
13031 does use completely different names for the comparisons that the
13032 fp conditional moves. */
13035 switch (GET_CODE (x))
13038 fputs ("eq", file);
13041 fputs ("eq_us", file);
13044 fputs ("lt", file);
13047 fputs ("nge", file);
13050 fputs ("le", file);
13053 fputs ("ngt", file);
13056 fputs ("unord", file);
13059 fputs ("neq", file);
13062 fputs ("neq_oq", file);
13065 fputs ("ge", file);
13068 fputs ("nlt", file);
13071 fputs ("gt", file);
13074 fputs ("nle", file);
13077 fputs ("ord", file);
13080 output_operand_lossage ("operand is not a condition code, "
13081 "invalid operand code 'D'");
13087 switch (GET_CODE (x))
13091 fputs ("eq", file);
13095 fputs ("lt", file);
13099 fputs ("le", file);
13102 fputs ("unord", file);
13106 fputs ("neq", file);
13110 fputs ("nlt", file);
13114 fputs ("nle", file);
13117 fputs ("ord", file);
13120 output_operand_lossage ("operand is not a condition code, "
13121 "invalid operand code 'D'");
13127 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13128 if (ASSEMBLER_DIALECT == ASM_ATT)
13130 switch (GET_MODE (x))
13132 case HImode: putc ('w', file); break;
13134 case SFmode: putc ('l', file); break;
13136 case DFmode: putc ('q', file); break;
13137 default: gcc_unreachable ();
13144 if (!COMPARISON_P (x))
13146 output_operand_lossage ("operand is neither a constant nor a "
13147 "condition code, invalid operand code "
13151 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13154 if (!COMPARISON_P (x))
13156 output_operand_lossage ("operand is neither a constant nor a "
13157 "condition code, invalid operand code "
13161 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13162 if (ASSEMBLER_DIALECT == ASM_ATT)
13165 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13168 /* Like above, but reverse condition */
13170 /* Check to see if argument to %c is really a constant
13171 and not a condition code which needs to be reversed. */
13172 if (!COMPARISON_P (x))
13174 output_operand_lossage ("operand is neither a constant nor a "
13175 "condition code, invalid operand "
13179 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13182 if (!COMPARISON_P (x))
13184 output_operand_lossage ("operand is neither a constant nor a "
13185 "condition code, invalid operand "
13189 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13190 if (ASSEMBLER_DIALECT == ASM_ATT)
13193 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13197 /* It doesn't actually matter what mode we use here, as we're
13198 only going to use this for printing. */
13199 x = adjust_address_nv (x, DImode, 8);
13207 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13210 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13213 int pred_val = INTVAL (XEXP (x, 0));
13215 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13216 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13218 int taken = pred_val > REG_BR_PROB_BASE / 2;
13219 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13221 /* Emit hints only in the case default branch prediction
13222 heuristics would fail. */
13223 if (taken != cputaken)
13225 /* We use 3e (DS) prefix for taken branches and
13226 2e (CS) prefix for not taken branches. */
13228 fputs ("ds ; ", file);
13230 fputs ("cs ; ", file);
13238 switch (GET_CODE (x))
13241 fputs ("neq", file);
13244 fputs ("eq", file);
13248 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13252 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13256 fputs ("le", file);
13260 fputs ("lt", file);
13263 fputs ("unord", file);
13266 fputs ("ord", file);
13269 fputs ("ueq", file);
13272 fputs ("nlt", file);
13275 fputs ("nle", file);
13278 fputs ("ule", file);
13281 fputs ("ult", file);
13284 fputs ("une", file);
13287 output_operand_lossage ("operand is not a condition code, "
13288 "invalid operand code 'Y'");
13294 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13300 if (ASSEMBLER_DIALECT == ASM_ATT)
13303 /* The kernel uses a different segment register for performance
13304 reasons; a system call would not have to trash the userspace
13305 segment register, which would be expensive. */
13306 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13307 fputs ("fs", file);
13309 fputs ("gs", file);
13313 output_operand_lossage ("invalid operand code '%c'", code);
13318 print_reg (x, code, file);
13320 else if (MEM_P (x))
13322 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13323 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13324 && GET_MODE (x) != BLKmode)
13327 switch (GET_MODE_SIZE (GET_MODE (x)))
13329 case 1: size = "BYTE"; break;
13330 case 2: size = "WORD"; break;
13331 case 4: size = "DWORD"; break;
13332 case 8: size = "QWORD"; break;
13333 case 12: size = "TBYTE"; break;
13335 if (GET_MODE (x) == XFmode)
13340 case 32: size = "YMMWORD"; break;
13342 gcc_unreachable ();
13345 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13348 else if (code == 'w')
13350 else if (code == 'k')
13353 fputs (size, file);
13354 fputs (" PTR ", file);
13358 /* Avoid (%rip) for call operands. */
13359 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13360 && !CONST_INT_P (x))
13361 output_addr_const (file, x);
13362 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13363 output_operand_lossage ("invalid constraints for operand");
13365 output_address (x);
13368 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13373 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13374 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13376 if (ASSEMBLER_DIALECT == ASM_ATT)
13378 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13380 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13382 fprintf (file, "0x%08x", (unsigned int) l);
13385 /* These float cases don't actually occur as immediate operands. */
13386 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13390 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13391 fputs (dstr, file);
13394 else if (GET_CODE (x) == CONST_DOUBLE
13395 && GET_MODE (x) == XFmode)
13399 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13400 fputs (dstr, file);
13405 /* We have patterns that allow zero sets of memory, for instance.
13406 In 64-bit mode, we should probably support all 8-byte vectors,
13407 since we can in fact encode that into an immediate. */
13408 if (GET_CODE (x) == CONST_VECTOR)
13410 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13416 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13418 if (ASSEMBLER_DIALECT == ASM_ATT)
13421 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13422 || GET_CODE (x) == LABEL_REF)
13424 if (ASSEMBLER_DIALECT == ASM_ATT)
13427 fputs ("OFFSET FLAT:", file);
13430 if (CONST_INT_P (x))
13431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13433 output_pic_addr_const (file, x, code);
13435 output_addr_const (file, x);
13440 ix86_print_operand_punct_valid_p (unsigned char code)
13442 return (code == '@' || code == '*' || code == '+'
13443 || code == '&' || code == ';');
13446 /* Print a memory operand whose address is ADDR. */
13449 ix86_print_operand_address (FILE *file, rtx addr)
13451 struct ix86_address parts;
13452 rtx base, index, disp;
13454 int ok = ix86_decompose_address (addr, &parts);
13459 index = parts.index;
13461 scale = parts.scale;
13469 if (ASSEMBLER_DIALECT == ASM_ATT)
13471 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13474 gcc_unreachable ();
13477 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13478 if (TARGET_64BIT && !base && !index)
13482 if (GET_CODE (disp) == CONST
13483 && GET_CODE (XEXP (disp, 0)) == PLUS
13484 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13485 symbol = XEXP (XEXP (disp, 0), 0);
13487 if (GET_CODE (symbol) == LABEL_REF
13488 || (GET_CODE (symbol) == SYMBOL_REF
13489 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13492 if (!base && !index)
13494 /* Displacement only requires special attention. */
13496 if (CONST_INT_P (disp))
13498 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13499 fputs ("ds:", file);
13500 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13503 output_pic_addr_const (file, disp, 0);
13505 output_addr_const (file, disp);
13509 if (ASSEMBLER_DIALECT == ASM_ATT)
13514 output_pic_addr_const (file, disp, 0);
13515 else if (GET_CODE (disp) == LABEL_REF)
13516 output_asm_label (disp);
13518 output_addr_const (file, disp);
13523 print_reg (base, 0, file);
13527 print_reg (index, 0, file);
13529 fprintf (file, ",%d", scale);
13535 rtx offset = NULL_RTX;
13539 /* Pull out the offset of a symbol; print any symbol itself. */
13540 if (GET_CODE (disp) == CONST
13541 && GET_CODE (XEXP (disp, 0)) == PLUS
13542 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13544 offset = XEXP (XEXP (disp, 0), 1);
13545 disp = gen_rtx_CONST (VOIDmode,
13546 XEXP (XEXP (disp, 0), 0));
13550 output_pic_addr_const (file, disp, 0);
13551 else if (GET_CODE (disp) == LABEL_REF)
13552 output_asm_label (disp);
13553 else if (CONST_INT_P (disp))
13556 output_addr_const (file, disp);
13562 print_reg (base, 0, file);
13565 if (INTVAL (offset) >= 0)
13567 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13571 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13578 print_reg (index, 0, file);
13580 fprintf (file, "*%d", scale);
13587 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13590 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13594 if (GET_CODE (x) != UNSPEC)
13597 op = XVECEXP (x, 0, 0);
13598 switch (XINT (x, 1))
13600 case UNSPEC_GOTTPOFF:
13601 output_addr_const (file, op);
13602 /* FIXME: This might be @TPOFF in Sun ld. */
13603 fputs ("@gottpoff", file);
13606 output_addr_const (file, op);
13607 fputs ("@tpoff", file);
13609 case UNSPEC_NTPOFF:
13610 output_addr_const (file, op);
13612 fputs ("@tpoff", file);
13614 fputs ("@ntpoff", file);
13616 case UNSPEC_DTPOFF:
13617 output_addr_const (file, op);
13618 fputs ("@dtpoff", file);
13620 case UNSPEC_GOTNTPOFF:
13621 output_addr_const (file, op);
13623 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13624 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13626 fputs ("@gotntpoff", file);
13628 case UNSPEC_INDNTPOFF:
13629 output_addr_const (file, op);
13630 fputs ("@indntpoff", file);
13633 case UNSPEC_MACHOPIC_OFFSET:
13634 output_addr_const (file, op);
13636 machopic_output_function_base_name (file);
13640 case UNSPEC_STACK_CHECK:
13644 gcc_assert (flag_split_stack);
13646 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
13647 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
13649 gcc_unreachable ();
13652 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
13663 /* Split one or more double-mode RTL references into pairs of half-mode
13664 references. The RTL can be REG, offsettable MEM, integer constant, or
13665 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
13666 split and "num" is its length. lo_half and hi_half are output arrays
13667 that parallel "operands". */
13670 split_double_mode (enum machine_mode mode, rtx operands[],
13671 int num, rtx lo_half[], rtx hi_half[])
13673 enum machine_mode half_mode;
13679 half_mode = DImode;
13682 half_mode = SImode;
13685 gcc_unreachable ();
13688 byte = GET_MODE_SIZE (half_mode);
13692 rtx op = operands[num];
13694 /* simplify_subreg refuse to split volatile memory addresses,
13695 but we still have to handle it. */
13698 lo_half[num] = adjust_address (op, half_mode, 0);
13699 hi_half[num] = adjust_address (op, half_mode, byte);
13703 lo_half[num] = simplify_gen_subreg (half_mode, op,
13704 GET_MODE (op) == VOIDmode
13705 ? mode : GET_MODE (op), 0);
13706 hi_half[num] = simplify_gen_subreg (half_mode, op,
13707 GET_MODE (op) == VOIDmode
13708 ? mode : GET_MODE (op), byte);
13713 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13714 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13715 is the expression of the binary operation. The output may either be
13716 emitted here, or returned to the caller, like all output_* functions.
13718 There is no guarantee that the operands are the same mode, as they
13719 might be within FLOAT or FLOAT_EXTEND expressions. */
13721 #ifndef SYSV386_COMPAT
13722 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13723 wants to fix the assemblers because that causes incompatibility
13724 with gcc. No-one wants to fix gcc because that causes
13725 incompatibility with assemblers... You can use the option of
13726 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13727 #define SYSV386_COMPAT 1
13731 output_387_binary_op (rtx insn, rtx *operands)
13733 static char buf[40];
13736 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13738 #ifdef ENABLE_CHECKING
13739 /* Even if we do not want to check the inputs, this documents input
13740 constraints. Which helps in understanding the following code. */
13741 if (STACK_REG_P (operands[0])
13742 && ((REG_P (operands[1])
13743 && REGNO (operands[0]) == REGNO (operands[1])
13744 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13745 || (REG_P (operands[2])
13746 && REGNO (operands[0]) == REGNO (operands[2])
13747 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13748 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13751 gcc_assert (is_sse);
13754 switch (GET_CODE (operands[3]))
13757 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13758 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13766 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13767 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13775 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13776 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13784 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13785 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13793 gcc_unreachable ();
13800 strcpy (buf, ssep);
13801 if (GET_MODE (operands[0]) == SFmode)
13802 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13804 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13808 strcpy (buf, ssep + 1);
13809 if (GET_MODE (operands[0]) == SFmode)
13810 strcat (buf, "ss\t{%2, %0|%0, %2}");
13812 strcat (buf, "sd\t{%2, %0|%0, %2}");
13818 switch (GET_CODE (operands[3]))
13822 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13824 rtx temp = operands[2];
13825 operands[2] = operands[1];
13826 operands[1] = temp;
13829 /* know operands[0] == operands[1]. */
13831 if (MEM_P (operands[2]))
13837 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13839 if (STACK_TOP_P (operands[0]))
13840 /* How is it that we are storing to a dead operand[2]?
13841 Well, presumably operands[1] is dead too. We can't
13842 store the result to st(0) as st(0) gets popped on this
13843 instruction. Instead store to operands[2] (which I
13844 think has to be st(1)). st(1) will be popped later.
13845 gcc <= 2.8.1 didn't have this check and generated
13846 assembly code that the Unixware assembler rejected. */
13847 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13849 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13853 if (STACK_TOP_P (operands[0]))
13854 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13856 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13861 if (MEM_P (operands[1]))
13867 if (MEM_P (operands[2]))
13873 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13876 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13877 derived assemblers, confusingly reverse the direction of
13878 the operation for fsub{r} and fdiv{r} when the
13879 destination register is not st(0). The Intel assembler
13880 doesn't have this brain damage. Read !SYSV386_COMPAT to
13881 figure out what the hardware really does. */
13882 if (STACK_TOP_P (operands[0]))
13883 p = "{p\t%0, %2|rp\t%2, %0}";
13885 p = "{rp\t%2, %0|p\t%0, %2}";
13887 if (STACK_TOP_P (operands[0]))
13888 /* As above for fmul/fadd, we can't store to st(0). */
13889 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13891 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13896 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13899 if (STACK_TOP_P (operands[0]))
13900 p = "{rp\t%0, %1|p\t%1, %0}";
13902 p = "{p\t%1, %0|rp\t%0, %1}";
13904 if (STACK_TOP_P (operands[0]))
13905 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13907 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13912 if (STACK_TOP_P (operands[0]))
13914 if (STACK_TOP_P (operands[1]))
13915 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13917 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13920 else if (STACK_TOP_P (operands[1]))
13923 p = "{\t%1, %0|r\t%0, %1}";
13925 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13931 p = "{r\t%2, %0|\t%0, %2}";
13933 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13939 gcc_unreachable ();
13946 /* Return needed mode for entity in optimize_mode_switching pass. */
13949 ix86_mode_needed (int entity, rtx insn)
13951 enum attr_i387_cw mode;
13953 /* The mode UNINITIALIZED is used to store control word after a
13954 function call or ASM pattern. The mode ANY specify that function
13955 has no requirements on the control word and make no changes in the
13956 bits we are interested in. */
13959 || (NONJUMP_INSN_P (insn)
13960 && (asm_noperands (PATTERN (insn)) >= 0
13961 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13962 return I387_CW_UNINITIALIZED;
13964 if (recog_memoized (insn) < 0)
13965 return I387_CW_ANY;
13967 mode = get_attr_i387_cw (insn);
13972 if (mode == I387_CW_TRUNC)
13977 if (mode == I387_CW_FLOOR)
13982 if (mode == I387_CW_CEIL)
13987 if (mode == I387_CW_MASK_PM)
13992 gcc_unreachable ();
13995 return I387_CW_ANY;
13998 /* Output code to initialize control word copies used by trunc?f?i and
13999 rounding patterns. CURRENT_MODE is set to current control word,
14000 while NEW_MODE is set to new control word. */
14003 emit_i387_cw_initialization (int mode)
14005 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14008 enum ix86_stack_slot slot;
14010 rtx reg = gen_reg_rtx (HImode);
14012 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14013 emit_move_insn (reg, copy_rtx (stored_mode));
14015 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14016 || optimize_function_for_size_p (cfun))
14020 case I387_CW_TRUNC:
14021 /* round toward zero (truncate) */
14022 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14023 slot = SLOT_CW_TRUNC;
14026 case I387_CW_FLOOR:
14027 /* round down toward -oo */
14028 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14029 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14030 slot = SLOT_CW_FLOOR;
14034 /* round up toward +oo */
14035 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14036 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14037 slot = SLOT_CW_CEIL;
14040 case I387_CW_MASK_PM:
14041 /* mask precision exception for nearbyint() */
14042 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14043 slot = SLOT_CW_MASK_PM;
14047 gcc_unreachable ();
14054 case I387_CW_TRUNC:
14055 /* round toward zero (truncate) */
14056 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14057 slot = SLOT_CW_TRUNC;
14060 case I387_CW_FLOOR:
14061 /* round down toward -oo */
14062 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14063 slot = SLOT_CW_FLOOR;
14067 /* round up toward +oo */
14068 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14069 slot = SLOT_CW_CEIL;
14072 case I387_CW_MASK_PM:
14073 /* mask precision exception for nearbyint() */
14074 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14075 slot = SLOT_CW_MASK_PM;
14079 gcc_unreachable ();
14083 gcc_assert (slot < MAX_386_STACK_LOCALS);
14085 new_mode = assign_386_stack_local (HImode, slot);
14086 emit_move_insn (new_mode, reg);
14089 /* Output code for INSN to convert a float to a signed int. OPERANDS
14090 are the insn operands. The output may be [HSD]Imode and the input
14091 operand may be [SDX]Fmode. */
14094 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14096 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14097 int dimode_p = GET_MODE (operands[0]) == DImode;
14098 int round_mode = get_attr_i387_cw (insn);
14100 /* Jump through a hoop or two for DImode, since the hardware has no
14101 non-popping instruction. We used to do this a different way, but
14102 that was somewhat fragile and broke with post-reload splitters. */
14103 if ((dimode_p || fisttp) && !stack_top_dies)
14104 output_asm_insn ("fld\t%y1", operands);
14106 gcc_assert (STACK_TOP_P (operands[1]));
14107 gcc_assert (MEM_P (operands[0]));
14108 gcc_assert (GET_MODE (operands[1]) != TFmode);
14111 output_asm_insn ("fisttp%Z0\t%0", operands);
14114 if (round_mode != I387_CW_ANY)
14115 output_asm_insn ("fldcw\t%3", operands);
14116 if (stack_top_dies || dimode_p)
14117 output_asm_insn ("fistp%Z0\t%0", operands);
14119 output_asm_insn ("fist%Z0\t%0", operands);
14120 if (round_mode != I387_CW_ANY)
14121 output_asm_insn ("fldcw\t%2", operands);
14127 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14128 have the values zero or one, indicates the ffreep insn's operand
14129 from the OPERANDS array. */
14131 static const char *
14132 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14134 if (TARGET_USE_FFREEP)
14135 #ifdef HAVE_AS_IX86_FFREEP
14136 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14139 static char retval[32];
14140 int regno = REGNO (operands[opno]);
14142 gcc_assert (FP_REGNO_P (regno));
14144 regno -= FIRST_STACK_REG;
14146 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14151 return opno ? "fstp\t%y1" : "fstp\t%y0";
14155 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14156 should be used. UNORDERED_P is true when fucom should be used. */
14159 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14161 int stack_top_dies;
14162 rtx cmp_op0, cmp_op1;
14163 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14167 cmp_op0 = operands[0];
14168 cmp_op1 = operands[1];
14172 cmp_op0 = operands[1];
14173 cmp_op1 = operands[2];
14178 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14179 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14180 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14181 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14183 if (GET_MODE (operands[0]) == SFmode)
14185 return &ucomiss[TARGET_AVX ? 0 : 1];
14187 return &comiss[TARGET_AVX ? 0 : 1];
14190 return &ucomisd[TARGET_AVX ? 0 : 1];
14192 return &comisd[TARGET_AVX ? 0 : 1];
14195 gcc_assert (STACK_TOP_P (cmp_op0));
14197 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14199 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14201 if (stack_top_dies)
14203 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14204 return output_387_ffreep (operands, 1);
14207 return "ftst\n\tfnstsw\t%0";
14210 if (STACK_REG_P (cmp_op1)
14212 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14213 && REGNO (cmp_op1) != FIRST_STACK_REG)
14215 /* If both the top of the 387 stack dies, and the other operand
14216 is also a stack register that dies, then this must be a
14217 `fcompp' float compare */
14221 /* There is no double popping fcomi variant. Fortunately,
14222 eflags is immune from the fstp's cc clobbering. */
14224 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14226 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14227 return output_387_ffreep (operands, 0);
14232 return "fucompp\n\tfnstsw\t%0";
14234 return "fcompp\n\tfnstsw\t%0";
14239 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14241 static const char * const alt[16] =
14243 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14244 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14245 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14246 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14248 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14249 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14253 "fcomi\t{%y1, %0|%0, %y1}",
14254 "fcomip\t{%y1, %0|%0, %y1}",
14255 "fucomi\t{%y1, %0|%0, %y1}",
14256 "fucomip\t{%y1, %0|%0, %y1}",
14267 mask = eflags_p << 3;
14268 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14269 mask |= unordered_p << 1;
14270 mask |= stack_top_dies;
14272 gcc_assert (mask < 16);
14281 ix86_output_addr_vec_elt (FILE *file, int value)
14283 const char *directive = ASM_LONG;
14287 directive = ASM_QUAD;
14289 gcc_assert (!TARGET_64BIT);
14292 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14296 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14298 const char *directive = ASM_LONG;
14301 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14302 directive = ASM_QUAD;
14304 gcc_assert (!TARGET_64BIT);
14306 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14307 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14308 fprintf (file, "%s%s%d-%s%d\n",
14309 directive, LPREFIX, value, LPREFIX, rel);
14310 else if (HAVE_AS_GOTOFF_IN_DATA)
14311 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14313 else if (TARGET_MACHO)
14315 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14316 machopic_output_function_base_name (file);
14321 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14322 GOT_SYMBOL_NAME, LPREFIX, value);
14325 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14329 ix86_expand_clear (rtx dest)
14333 /* We play register width games, which are only valid after reload. */
14334 gcc_assert (reload_completed);
14336 /* Avoid HImode and its attendant prefix byte. */
14337 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14338 dest = gen_rtx_REG (SImode, REGNO (dest));
14339 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14341 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14342 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14344 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14345 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14351 /* X is an unchanging MEM. If it is a constant pool reference, return
14352 the constant pool rtx, else NULL. */
14355 maybe_get_pool_constant (rtx x)
14357 x = ix86_delegitimize_address (XEXP (x, 0));
14359 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14360 return get_pool_constant (x);
14366 ix86_expand_move (enum machine_mode mode, rtx operands[])
14369 enum tls_model model;
14374 if (GET_CODE (op1) == SYMBOL_REF)
14376 model = SYMBOL_REF_TLS_MODEL (op1);
14379 op1 = legitimize_tls_address (op1, model, true);
14380 op1 = force_operand (op1, op0);
14384 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14385 && SYMBOL_REF_DLLIMPORT_P (op1))
14386 op1 = legitimize_dllimport_symbol (op1, false);
14388 else if (GET_CODE (op1) == CONST
14389 && GET_CODE (XEXP (op1, 0)) == PLUS
14390 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14392 rtx addend = XEXP (XEXP (op1, 0), 1);
14393 rtx symbol = XEXP (XEXP (op1, 0), 0);
14396 model = SYMBOL_REF_TLS_MODEL (symbol);
14398 tmp = legitimize_tls_address (symbol, model, true);
14399 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14400 && SYMBOL_REF_DLLIMPORT_P (symbol))
14401 tmp = legitimize_dllimport_symbol (symbol, true);
14405 tmp = force_operand (tmp, NULL);
14406 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14407 op0, 1, OPTAB_DIRECT);
14413 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14415 if (TARGET_MACHO && !TARGET_64BIT)
14420 rtx temp = ((reload_in_progress
14421 || ((op0 && REG_P (op0))
14423 ? op0 : gen_reg_rtx (Pmode));
14424 op1 = machopic_indirect_data_reference (op1, temp);
14425 op1 = machopic_legitimize_pic_address (op1, mode,
14426 temp == op1 ? 0 : temp);
14428 else if (MACHOPIC_INDIRECT)
14429 op1 = machopic_indirect_data_reference (op1, 0);
14437 op1 = force_reg (Pmode, op1);
14438 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14440 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14441 op1 = legitimize_pic_address (op1, reg);
14450 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14451 || !push_operand (op0, mode))
14453 op1 = force_reg (mode, op1);
14455 if (push_operand (op0, mode)
14456 && ! general_no_elim_operand (op1, mode))
14457 op1 = copy_to_mode_reg (mode, op1);
14459 /* Force large constants in 64bit compilation into register
14460 to get them CSEed. */
14461 if (can_create_pseudo_p ()
14462 && (mode == DImode) && TARGET_64BIT
14463 && immediate_operand (op1, mode)
14464 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14465 && !register_operand (op0, mode)
14467 op1 = copy_to_mode_reg (mode, op1);
14469 if (can_create_pseudo_p ()
14470 && FLOAT_MODE_P (mode)
14471 && GET_CODE (op1) == CONST_DOUBLE)
14473 /* If we are loading a floating point constant to a register,
14474 force the value to memory now, since we'll get better code
14475 out the back end. */
14477 op1 = validize_mem (force_const_mem (mode, op1));
14478 if (!register_operand (op0, mode))
14480 rtx temp = gen_reg_rtx (mode);
14481 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14482 emit_move_insn (op0, temp);
14488 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14492 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14494 rtx op0 = operands[0], op1 = operands[1];
14495 unsigned int align = GET_MODE_ALIGNMENT (mode);
14497 /* Force constants other than zero into memory. We do not know how
14498 the instructions used to build constants modify the upper 64 bits
14499 of the register, once we have that information we may be able
14500 to handle some of them more efficiently. */
14501 if (can_create_pseudo_p ()
14502 && register_operand (op0, mode)
14503 && (CONSTANT_P (op1)
14504 || (GET_CODE (op1) == SUBREG
14505 && CONSTANT_P (SUBREG_REG (op1))))
14506 && !standard_sse_constant_p (op1))
14507 op1 = validize_mem (force_const_mem (mode, op1));
14509 /* We need to check memory alignment for SSE mode since attribute
14510 can make operands unaligned. */
14511 if (can_create_pseudo_p ()
14512 && SSE_REG_MODE_P (mode)
14513 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14514 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14518 /* ix86_expand_vector_move_misalign() does not like constants ... */
14519 if (CONSTANT_P (op1)
14520 || (GET_CODE (op1) == SUBREG
14521 && CONSTANT_P (SUBREG_REG (op1))))
14522 op1 = validize_mem (force_const_mem (mode, op1));
14524 /* ... nor both arguments in memory. */
14525 if (!register_operand (op0, mode)
14526 && !register_operand (op1, mode))
14527 op1 = force_reg (mode, op1);
14529 tmp[0] = op0; tmp[1] = op1;
14530 ix86_expand_vector_move_misalign (mode, tmp);
14534 /* Make operand1 a register if it isn't already. */
14535 if (can_create_pseudo_p ()
14536 && !register_operand (op0, mode)
14537 && !register_operand (op1, mode))
14539 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14543 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14546 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14547 straight to ix86_expand_vector_move. */
14548 /* Code generation for scalar reg-reg moves of single and double precision data:
14549 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14553 if (x86_sse_partial_reg_dependency == true)
14558 Code generation for scalar loads of double precision data:
14559 if (x86_sse_split_regs == true)
14560 movlpd mem, reg (gas syntax)
14564 Code generation for unaligned packed loads of single precision data
14565 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14566 if (x86_sse_unaligned_move_optimal)
14569 if (x86_sse_partial_reg_dependency == true)
14581 Code generation for unaligned packed loads of double precision data
14582 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14583 if (x86_sse_unaligned_move_optimal)
14586 if (x86_sse_split_regs == true)
14599 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14608 switch (GET_MODE_CLASS (mode))
14610 case MODE_VECTOR_INT:
14612 switch (GET_MODE_SIZE (mode))
14615 /* If we're optimizing for size, movups is the smallest. */
14616 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14618 op0 = gen_lowpart (V4SFmode, op0);
14619 op1 = gen_lowpart (V4SFmode, op1);
14620 emit_insn (gen_avx_movups (op0, op1));
14623 op0 = gen_lowpart (V16QImode, op0);
14624 op1 = gen_lowpart (V16QImode, op1);
14625 emit_insn (gen_avx_movdqu (op0, op1));
14628 op0 = gen_lowpart (V32QImode, op0);
14629 op1 = gen_lowpart (V32QImode, op1);
14630 emit_insn (gen_avx_movdqu256 (op0, op1));
14633 gcc_unreachable ();
14636 case MODE_VECTOR_FLOAT:
14637 op0 = gen_lowpart (mode, op0);
14638 op1 = gen_lowpart (mode, op1);
14643 emit_insn (gen_avx_movups (op0, op1));
14646 emit_insn (gen_avx_movups256 (op0, op1));
14649 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14651 op0 = gen_lowpart (V4SFmode, op0);
14652 op1 = gen_lowpart (V4SFmode, op1);
14653 emit_insn (gen_avx_movups (op0, op1));
14656 emit_insn (gen_avx_movupd (op0, op1));
14659 emit_insn (gen_avx_movupd256 (op0, op1));
14662 gcc_unreachable ();
14667 gcc_unreachable ();
14675 /* If we're optimizing for size, movups is the smallest. */
14676 if (optimize_insn_for_size_p ()
14677 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14679 op0 = gen_lowpart (V4SFmode, op0);
14680 op1 = gen_lowpart (V4SFmode, op1);
14681 emit_insn (gen_sse_movups (op0, op1));
14685 /* ??? If we have typed data, then it would appear that using
14686 movdqu is the only way to get unaligned data loaded with
14688 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14690 op0 = gen_lowpart (V16QImode, op0);
14691 op1 = gen_lowpart (V16QImode, op1);
14692 emit_insn (gen_sse2_movdqu (op0, op1));
14696 if (TARGET_SSE2 && mode == V2DFmode)
14700 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14702 op0 = gen_lowpart (V2DFmode, op0);
14703 op1 = gen_lowpart (V2DFmode, op1);
14704 emit_insn (gen_sse2_movupd (op0, op1));
14708 /* When SSE registers are split into halves, we can avoid
14709 writing to the top half twice. */
14710 if (TARGET_SSE_SPLIT_REGS)
14712 emit_clobber (op0);
14717 /* ??? Not sure about the best option for the Intel chips.
14718 The following would seem to satisfy; the register is
14719 entirely cleared, breaking the dependency chain. We
14720 then store to the upper half, with a dependency depth
14721 of one. A rumor has it that Intel recommends two movsd
14722 followed by an unpacklpd, but this is unconfirmed. And
14723 given that the dependency depth of the unpacklpd would
14724 still be one, I'm not sure why this would be better. */
14725 zero = CONST0_RTX (V2DFmode);
14728 m = adjust_address (op1, DFmode, 0);
14729 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14730 m = adjust_address (op1, DFmode, 8);
14731 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14735 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14737 op0 = gen_lowpart (V4SFmode, op0);
14738 op1 = gen_lowpart (V4SFmode, op1);
14739 emit_insn (gen_sse_movups (op0, op1));
14743 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14744 emit_move_insn (op0, CONST0_RTX (mode));
14746 emit_clobber (op0);
14748 if (mode != V4SFmode)
14749 op0 = gen_lowpart (V4SFmode, op0);
14750 m = adjust_address (op1, V2SFmode, 0);
14751 emit_insn (gen_sse_loadlps (op0, op0, m));
14752 m = adjust_address (op1, V2SFmode, 8);
14753 emit_insn (gen_sse_loadhps (op0, op0, m));
14756 else if (MEM_P (op0))
14758 /* If we're optimizing for size, movups is the smallest. */
14759 if (optimize_insn_for_size_p ()
14760 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14762 op0 = gen_lowpart (V4SFmode, op0);
14763 op1 = gen_lowpart (V4SFmode, op1);
14764 emit_insn (gen_sse_movups (op0, op1));
14768 /* ??? Similar to above, only less clear because of quote
14769 typeless stores unquote. */
14770 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14771 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14773 op0 = gen_lowpart (V16QImode, op0);
14774 op1 = gen_lowpart (V16QImode, op1);
14775 emit_insn (gen_sse2_movdqu (op0, op1));
14779 if (TARGET_SSE2 && mode == V2DFmode)
14781 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14783 op0 = gen_lowpart (V2DFmode, op0);
14784 op1 = gen_lowpart (V2DFmode, op1);
14785 emit_insn (gen_sse2_movupd (op0, op1));
14789 m = adjust_address (op0, DFmode, 0);
14790 emit_insn (gen_sse2_storelpd (m, op1));
14791 m = adjust_address (op0, DFmode, 8);
14792 emit_insn (gen_sse2_storehpd (m, op1));
14797 if (mode != V4SFmode)
14798 op1 = gen_lowpart (V4SFmode, op1);
14800 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14802 op0 = gen_lowpart (V4SFmode, op0);
14803 emit_insn (gen_sse_movups (op0, op1));
14807 m = adjust_address (op0, V2SFmode, 0);
14808 emit_insn (gen_sse_storelps (m, op1));
14809 m = adjust_address (op0, V2SFmode, 8);
14810 emit_insn (gen_sse_storehps (m, op1));
14815 gcc_unreachable ();
14818 /* Expand a push in MODE. This is some mode for which we do not support
14819 proper push instructions, at least from the registers that we expect
14820 the value to live in. */
14823 ix86_expand_push (enum machine_mode mode, rtx x)
14827 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14828 GEN_INT (-GET_MODE_SIZE (mode)),
14829 stack_pointer_rtx, 1, OPTAB_DIRECT);
14830 if (tmp != stack_pointer_rtx)
14831 emit_move_insn (stack_pointer_rtx, tmp);
14833 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14835 /* When we push an operand onto stack, it has to be aligned at least
14836 at the function argument boundary. However since we don't have
14837 the argument type, we can't determine the actual argument
14839 emit_move_insn (tmp, x);
14842 /* Helper function of ix86_fixup_binary_operands to canonicalize
14843 operand order. Returns true if the operands should be swapped. */
14846 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14849 rtx dst = operands[0];
14850 rtx src1 = operands[1];
14851 rtx src2 = operands[2];
14853 /* If the operation is not commutative, we can't do anything. */
14854 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14857 /* Highest priority is that src1 should match dst. */
14858 if (rtx_equal_p (dst, src1))
14860 if (rtx_equal_p (dst, src2))
14863 /* Next highest priority is that immediate constants come second. */
14864 if (immediate_operand (src2, mode))
14866 if (immediate_operand (src1, mode))
14869 /* Lowest priority is that memory references should come second. */
14879 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14880 destination to use for the operation. If different from the true
14881 destination in operands[0], a copy operation will be required. */
14884 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14887 rtx dst = operands[0];
14888 rtx src1 = operands[1];
14889 rtx src2 = operands[2];
14891 /* Canonicalize operand order. */
14892 if (ix86_swap_binary_operands_p (code, mode, operands))
14896 /* It is invalid to swap operands of different modes. */
14897 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14904 /* Both source operands cannot be in memory. */
14905 if (MEM_P (src1) && MEM_P (src2))
14907 /* Optimization: Only read from memory once. */
14908 if (rtx_equal_p (src1, src2))
14910 src2 = force_reg (mode, src2);
14914 src2 = force_reg (mode, src2);
14917 /* If the destination is memory, and we do not have matching source
14918 operands, do things in registers. */
14919 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14920 dst = gen_reg_rtx (mode);
14922 /* Source 1 cannot be a constant. */
14923 if (CONSTANT_P (src1))
14924 src1 = force_reg (mode, src1);
14926 /* Source 1 cannot be a non-matching memory. */
14927 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14928 src1 = force_reg (mode, src1);
14930 operands[1] = src1;
14931 operands[2] = src2;
14935 /* Similarly, but assume that the destination has already been
14936 set up properly. */
14939 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14940 enum machine_mode mode, rtx operands[])
14942 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14943 gcc_assert (dst == operands[0]);
14946 /* Attempt to expand a binary operator. Make the expansion closer to the
14947 actual machine, then just general_operand, which will allow 3 separate
14948 memory references (one output, two input) in a single insn. */
14951 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14954 rtx src1, src2, dst, op, clob;
14956 dst = ix86_fixup_binary_operands (code, mode, operands);
14957 src1 = operands[1];
14958 src2 = operands[2];
14960 /* Emit the instruction. */
14962 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14963 if (reload_in_progress)
14965 /* Reload doesn't know about the flags register, and doesn't know that
14966 it doesn't want to clobber it. We can only do this with PLUS. */
14967 gcc_assert (code == PLUS);
14972 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14973 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14976 /* Fix up the destination if needed. */
14977 if (dst != operands[0])
14978 emit_move_insn (operands[0], dst);
14981 /* Return TRUE or FALSE depending on whether the binary operator meets the
14982 appropriate constraints. */
14985 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14988 rtx dst = operands[0];
14989 rtx src1 = operands[1];
14990 rtx src2 = operands[2];
14992 /* Both source operands cannot be in memory. */
14993 if (MEM_P (src1) && MEM_P (src2))
14996 /* Canonicalize operand order for commutative operators. */
14997 if (ix86_swap_binary_operands_p (code, mode, operands))
15004 /* If the destination is memory, we must have a matching source operand. */
15005 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15008 /* Source 1 cannot be a constant. */
15009 if (CONSTANT_P (src1))
15012 /* Source 1 cannot be a non-matching memory. */
15013 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15019 /* Attempt to expand a unary operator. Make the expansion closer to the
15020 actual machine, then just general_operand, which will allow 2 separate
15021 memory references (one output, one input) in a single insn. */
15024 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15027 int matching_memory;
15028 rtx src, dst, op, clob;
15033 /* If the destination is memory, and we do not have matching source
15034 operands, do things in registers. */
15035 matching_memory = 0;
15038 if (rtx_equal_p (dst, src))
15039 matching_memory = 1;
15041 dst = gen_reg_rtx (mode);
15044 /* When source operand is memory, destination must match. */
15045 if (MEM_P (src) && !matching_memory)
15046 src = force_reg (mode, src);
15048 /* Emit the instruction. */
15050 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15051 if (reload_in_progress || code == NOT)
15053 /* Reload doesn't know about the flags register, and doesn't know that
15054 it doesn't want to clobber it. */
15055 gcc_assert (code == NOT);
15060 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15061 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15064 /* Fix up the destination if needed. */
15065 if (dst != operands[0])
15066 emit_move_insn (operands[0], dst);
15069 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15070 divisor are within the the range [0-255]. */
15073 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15076 rtx end_label, qimode_label;
15077 rtx insn, div, mod;
15078 rtx scratch, tmp0, tmp1, tmp2;
15079 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15080 rtx (*gen_zero_extend) (rtx, rtx);
15081 rtx (*gen_test_ccno_1) (rtx, rtx);
15086 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15087 gen_test_ccno_1 = gen_testsi_ccno_1;
15088 gen_zero_extend = gen_zero_extendqisi2;
15091 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15092 gen_test_ccno_1 = gen_testdi_ccno_1;
15093 gen_zero_extend = gen_zero_extendqidi2;
15096 gcc_unreachable ();
15099 end_label = gen_label_rtx ();
15100 qimode_label = gen_label_rtx ();
15102 scratch = gen_reg_rtx (mode);
15104 /* Use 8bit unsigned divimod if dividend and divisor are within the
15105 the range [0-255]. */
15106 emit_move_insn (scratch, operands[2]);
15107 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15108 scratch, 1, OPTAB_DIRECT);
15109 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15110 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15111 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15112 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15113 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15115 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15116 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15117 JUMP_LABEL (insn) = qimode_label;
15119 /* Generate original signed/unsigned divimod. */
15120 div = gen_divmod4_1 (operands[0], operands[1],
15121 operands[2], operands[3]);
15124 /* Branch to the end. */
15125 emit_jump_insn (gen_jump (end_label));
15128 /* Generate 8bit unsigned divide. */
15129 emit_label (qimode_label);
15130 /* Don't use operands[0] for result of 8bit divide since not all
15131 registers support QImode ZERO_EXTRACT. */
15132 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15133 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15134 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15135 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15139 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15140 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15144 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15145 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15148 /* Extract remainder from AH. */
15149 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15150 if (REG_P (operands[1]))
15151 insn = emit_move_insn (operands[1], tmp1);
15154 /* Need a new scratch register since the old one has result
15156 scratch = gen_reg_rtx (mode);
15157 emit_move_insn (scratch, tmp1);
15158 insn = emit_move_insn (operands[1], scratch);
15160 set_unique_reg_note (insn, REG_EQUAL, mod);
15162 /* Zero extend quotient from AL. */
15163 tmp1 = gen_lowpart (QImode, tmp0);
15164 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15165 set_unique_reg_note (insn, REG_EQUAL, div);
15167 emit_label (end_label);
15170 #define LEA_SEARCH_THRESHOLD 12
15172 /* Search backward for non-agu definition of register number REGNO1
15173 or register number REGNO2 in INSN's basic block until
15174 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15175 2. Reach BB boundary, or
15176 3. Reach agu definition.
15177 Returns the distance between the non-agu definition point and INSN.
15178 If no definition point, returns -1. */
15181 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15184 basic_block bb = BLOCK_FOR_INSN (insn);
15187 enum attr_type insn_type;
15189 if (insn != BB_HEAD (bb))
15191 rtx prev = PREV_INSN (insn);
15192 while (prev && distance < LEA_SEARCH_THRESHOLD)
15194 if (NONDEBUG_INSN_P (prev))
15197 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15198 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15199 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15200 && (regno1 == DF_REF_REGNO (*def_rec)
15201 || regno2 == DF_REF_REGNO (*def_rec)))
15203 insn_type = get_attr_type (prev);
15204 if (insn_type != TYPE_LEA)
15208 if (prev == BB_HEAD (bb))
15210 prev = PREV_INSN (prev);
15214 if (distance < LEA_SEARCH_THRESHOLD)
15218 bool simple_loop = false;
15220 FOR_EACH_EDGE (e, ei, bb->preds)
15223 simple_loop = true;
15229 rtx prev = BB_END (bb);
15232 && distance < LEA_SEARCH_THRESHOLD)
15234 if (NONDEBUG_INSN_P (prev))
15237 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15238 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15239 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15240 && (regno1 == DF_REF_REGNO (*def_rec)
15241 || regno2 == DF_REF_REGNO (*def_rec)))
15243 insn_type = get_attr_type (prev);
15244 if (insn_type != TYPE_LEA)
15248 prev = PREV_INSN (prev);
15256 /* get_attr_type may modify recog data. We want to make sure
15257 that recog data is valid for instruction INSN, on which
15258 distance_non_agu_define is called. INSN is unchanged here. */
15259 extract_insn_cached (insn);
15263 /* Return the distance between INSN and the next insn that uses
15264 register number REGNO0 in memory address. Return -1 if no such
15265 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15268 distance_agu_use (unsigned int regno0, rtx insn)
15270 basic_block bb = BLOCK_FOR_INSN (insn);
15275 if (insn != BB_END (bb))
15277 rtx next = NEXT_INSN (insn);
15278 while (next && distance < LEA_SEARCH_THRESHOLD)
15280 if (NONDEBUG_INSN_P (next))
15284 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15285 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15286 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15287 && regno0 == DF_REF_REGNO (*use_rec))
15289 /* Return DISTANCE if OP0 is used in memory
15290 address in NEXT. */
15294 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15295 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15296 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15297 && regno0 == DF_REF_REGNO (*def_rec))
15299 /* Return -1 if OP0 is set in NEXT. */
15303 if (next == BB_END (bb))
15305 next = NEXT_INSN (next);
15309 if (distance < LEA_SEARCH_THRESHOLD)
15313 bool simple_loop = false;
15315 FOR_EACH_EDGE (e, ei, bb->succs)
15318 simple_loop = true;
15324 rtx next = BB_HEAD (bb);
15327 && distance < LEA_SEARCH_THRESHOLD)
15329 if (NONDEBUG_INSN_P (next))
15333 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15334 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15335 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15336 && regno0 == DF_REF_REGNO (*use_rec))
15338 /* Return DISTANCE if OP0 is used in memory
15339 address in NEXT. */
15343 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15344 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15345 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15346 && regno0 == DF_REF_REGNO (*def_rec))
15348 /* Return -1 if OP0 is set in NEXT. */
15353 next = NEXT_INSN (next);
15361 /* Define this macro to tune LEA priority vs ADD, it take effect when
15362 there is a dilemma of choicing LEA or ADD
15363 Negative value: ADD is more preferred than LEA
15365 Positive value: LEA is more preferred than ADD*/
15366 #define IX86_LEA_PRIORITY 2
15368 /* Return true if it is ok to optimize an ADD operation to LEA
15369 operation to avoid flag register consumation. For most processors,
15370 ADD is faster than LEA. For the processors like ATOM, if the
15371 destination register of LEA holds an actual address which will be
15372 used soon, LEA is better and otherwise ADD is better. */
15375 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15377 unsigned int regno0 = true_regnum (operands[0]);
15378 unsigned int regno1 = true_regnum (operands[1]);
15379 unsigned int regno2 = true_regnum (operands[2]);
15381 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15382 if (regno0 != regno1 && regno0 != regno2)
15385 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15389 int dist_define, dist_use;
15391 /* Return false if REGNO0 isn't used in memory address. */
15392 dist_use = distance_agu_use (regno0, insn);
15396 dist_define = distance_non_agu_define (regno1, regno2, insn);
15397 if (dist_define <= 0)
15400 /* If this insn has both backward non-agu dependence and forward
15401 agu dependence, the one with short distance take effect. */
15402 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15409 /* Return true if destination reg of SET_BODY is shift count of
15413 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15419 /* Retrieve destination of SET_BODY. */
15420 switch (GET_CODE (set_body))
15423 set_dest = SET_DEST (set_body);
15424 if (!set_dest || !REG_P (set_dest))
15428 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15429 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15437 /* Retrieve shift count of USE_BODY. */
15438 switch (GET_CODE (use_body))
15441 shift_rtx = XEXP (use_body, 1);
15444 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15445 if (ix86_dep_by_shift_count_body (set_body,
15446 XVECEXP (use_body, 0, i)))
15454 && (GET_CODE (shift_rtx) == ASHIFT
15455 || GET_CODE (shift_rtx) == LSHIFTRT
15456 || GET_CODE (shift_rtx) == ASHIFTRT
15457 || GET_CODE (shift_rtx) == ROTATE
15458 || GET_CODE (shift_rtx) == ROTATERT))
15460 rtx shift_count = XEXP (shift_rtx, 1);
15462 /* Return true if shift count is dest of SET_BODY. */
15463 if (REG_P (shift_count)
15464 && true_regnum (set_dest) == true_regnum (shift_count))
15471 /* Return true if destination reg of SET_INSN is shift count of
15475 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15477 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15478 PATTERN (use_insn));
15481 /* Return TRUE or FALSE depending on whether the unary operator meets the
15482 appropriate constraints. */
15485 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15486 enum machine_mode mode ATTRIBUTE_UNUSED,
15487 rtx operands[2] ATTRIBUTE_UNUSED)
15489 /* If one of operands is memory, source and destination must match. */
15490 if ((MEM_P (operands[0])
15491 || MEM_P (operands[1]))
15492 && ! rtx_equal_p (operands[0], operands[1]))
15497 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15498 are ok, keeping in mind the possible movddup alternative. */
15501 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15503 if (MEM_P (operands[0]))
15504 return rtx_equal_p (operands[0], operands[1 + high]);
15505 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15506 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15510 /* Post-reload splitter for converting an SF or DFmode value in an
15511 SSE register into an unsigned SImode. */
15514 ix86_split_convert_uns_si_sse (rtx operands[])
15516 enum machine_mode vecmode;
15517 rtx value, large, zero_or_two31, input, two31, x;
15519 large = operands[1];
15520 zero_or_two31 = operands[2];
15521 input = operands[3];
15522 two31 = operands[4];
15523 vecmode = GET_MODE (large);
15524 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15526 /* Load up the value into the low element. We must ensure that the other
15527 elements are valid floats -- zero is the easiest such value. */
15530 if (vecmode == V4SFmode)
15531 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15533 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15537 input = gen_rtx_REG (vecmode, REGNO (input));
15538 emit_move_insn (value, CONST0_RTX (vecmode));
15539 if (vecmode == V4SFmode)
15540 emit_insn (gen_sse_movss (value, value, input));
15542 emit_insn (gen_sse2_movsd (value, value, input));
15545 emit_move_insn (large, two31);
15546 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15548 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15549 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15551 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15552 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15554 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15555 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15557 large = gen_rtx_REG (V4SImode, REGNO (large));
15558 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15560 x = gen_rtx_REG (V4SImode, REGNO (value));
15561 if (vecmode == V4SFmode)
15562 emit_insn (gen_sse2_cvttps2dq (x, value));
15564 emit_insn (gen_sse2_cvttpd2dq (x, value));
15567 emit_insn (gen_xorv4si3 (value, value, large));
15570 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15571 Expects the 64-bit DImode to be supplied in a pair of integral
15572 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15573 -mfpmath=sse, !optimize_size only. */
15576 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15578 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15579 rtx int_xmm, fp_xmm;
15580 rtx biases, exponents;
15583 int_xmm = gen_reg_rtx (V4SImode);
15584 if (TARGET_INTER_UNIT_MOVES)
15585 emit_insn (gen_movdi_to_sse (int_xmm, input));
15586 else if (TARGET_SSE_SPLIT_REGS)
15588 emit_clobber (int_xmm);
15589 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15593 x = gen_reg_rtx (V2DImode);
15594 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15595 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15598 x = gen_rtx_CONST_VECTOR (V4SImode,
15599 gen_rtvec (4, GEN_INT (0x43300000UL),
15600 GEN_INT (0x45300000UL),
15601 const0_rtx, const0_rtx));
15602 exponents = validize_mem (force_const_mem (V4SImode, x));
15604 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15605 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15607 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15608 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15609 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15610 (0x1.0p84 + double(fp_value_hi_xmm)).
15611 Note these exponents differ by 32. */
15613 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15615 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15616 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15617 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15618 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15619 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15620 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15621 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15622 biases = validize_mem (force_const_mem (V2DFmode, biases));
15623 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15625 /* Add the upper and lower DFmode values together. */
15627 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15630 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15631 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15632 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15635 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15638 /* Not used, but eases macroization of patterns. */
15640 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15641 rtx input ATTRIBUTE_UNUSED)
15643 gcc_unreachable ();
15646 /* Convert an unsigned SImode value into a DFmode. Only currently used
15647 for SSE, but applicable anywhere. */
15650 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15652 REAL_VALUE_TYPE TWO31r;
15655 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15656 NULL, 1, OPTAB_DIRECT);
15658 fp = gen_reg_rtx (DFmode);
15659 emit_insn (gen_floatsidf2 (fp, x));
15661 real_ldexp (&TWO31r, &dconst1, 31);
15662 x = const_double_from_real_value (TWO31r, DFmode);
15664 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15666 emit_move_insn (target, x);
15669 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15670 32-bit mode; otherwise we have a direct convert instruction. */
15673 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15675 REAL_VALUE_TYPE TWO32r;
15676 rtx fp_lo, fp_hi, x;
15678 fp_lo = gen_reg_rtx (DFmode);
15679 fp_hi = gen_reg_rtx (DFmode);
15681 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15683 real_ldexp (&TWO32r, &dconst1, 32);
15684 x = const_double_from_real_value (TWO32r, DFmode);
15685 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15687 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15689 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15692 emit_move_insn (target, x);
15695 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15696 For x86_32, -mfpmath=sse, !optimize_size only. */
15698 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15700 REAL_VALUE_TYPE ONE16r;
15701 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15703 real_ldexp (&ONE16r, &dconst1, 16);
15704 x = const_double_from_real_value (ONE16r, SFmode);
15705 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15706 NULL, 0, OPTAB_DIRECT);
15707 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15708 NULL, 0, OPTAB_DIRECT);
15709 fp_hi = gen_reg_rtx (SFmode);
15710 fp_lo = gen_reg_rtx (SFmode);
15711 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15712 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15713 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15715 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15717 if (!rtx_equal_p (target, fp_hi))
15718 emit_move_insn (target, fp_hi);
15721 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15722 then replicate the value for all elements of the vector
15726 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15733 v = gen_rtvec (4, value, value, value, value);
15734 return gen_rtx_CONST_VECTOR (V4SImode, v);
15738 v = gen_rtvec (2, value, value);
15739 return gen_rtx_CONST_VECTOR (V2DImode, v);
15743 v = gen_rtvec (4, value, value, value, value);
15745 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15746 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15747 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15751 v = gen_rtvec (2, value, value);
15753 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15754 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15757 gcc_unreachable ();
15761 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15762 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15763 for an SSE register. If VECT is true, then replicate the mask for
15764 all elements of the vector register. If INVERT is true, then create
15765 a mask excluding the sign bit. */
15768 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15770 enum machine_mode vec_mode, imode;
15771 HOST_WIDE_INT hi, lo;
15776 /* Find the sign bit, sign extended to 2*HWI. */
15782 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15783 lo = 0x80000000, hi = lo < 0;
15789 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15790 if (HOST_BITS_PER_WIDE_INT >= 64)
15791 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15793 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15798 vec_mode = VOIDmode;
15799 if (HOST_BITS_PER_WIDE_INT >= 64)
15802 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15809 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15813 lo = ~lo, hi = ~hi;
15819 mask = immed_double_const (lo, hi, imode);
15821 vec = gen_rtvec (2, v, mask);
15822 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15823 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15830 gcc_unreachable ();
15834 lo = ~lo, hi = ~hi;
15836 /* Force this value into the low part of a fp vector constant. */
15837 mask = immed_double_const (lo, hi, imode);
15838 mask = gen_lowpart (mode, mask);
15840 if (vec_mode == VOIDmode)
15841 return force_reg (mode, mask);
15843 v = ix86_build_const_vector (mode, vect, mask);
15844 return force_reg (vec_mode, v);
15847 /* Generate code for floating point ABS or NEG. */
15850 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15853 rtx mask, set, use, clob, dst, src;
15854 bool use_sse = false;
15855 bool vector_mode = VECTOR_MODE_P (mode);
15856 enum machine_mode elt_mode = mode;
15860 elt_mode = GET_MODE_INNER (mode);
15863 else if (mode == TFmode)
15865 else if (TARGET_SSE_MATH)
15866 use_sse = SSE_FLOAT_MODE_P (mode);
15868 /* NEG and ABS performed with SSE use bitwise mask operations.
15869 Create the appropriate mask now. */
15871 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15880 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15881 set = gen_rtx_SET (VOIDmode, dst, set);
15886 set = gen_rtx_fmt_e (code, mode, src);
15887 set = gen_rtx_SET (VOIDmode, dst, set);
15890 use = gen_rtx_USE (VOIDmode, mask);
15891 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15892 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15893 gen_rtvec (3, set, use, clob)));
15900 /* Expand a copysign operation. Special case operand 0 being a constant. */
15903 ix86_expand_copysign (rtx operands[])
15905 enum machine_mode mode;
15906 rtx dest, op0, op1, mask, nmask;
15908 dest = operands[0];
15912 mode = GET_MODE (dest);
15914 if (GET_CODE (op0) == CONST_DOUBLE)
15916 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15918 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15919 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15921 if (mode == SFmode || mode == DFmode)
15923 enum machine_mode vmode;
15925 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15927 if (op0 == CONST0_RTX (mode))
15928 op0 = CONST0_RTX (vmode);
15931 rtx v = ix86_build_const_vector (mode, false, op0);
15933 op0 = force_reg (vmode, v);
15936 else if (op0 != CONST0_RTX (mode))
15937 op0 = force_reg (mode, op0);
15939 mask = ix86_build_signbit_mask (mode, 0, 0);
15941 if (mode == SFmode)
15942 copysign_insn = gen_copysignsf3_const;
15943 else if (mode == DFmode)
15944 copysign_insn = gen_copysigndf3_const;
15946 copysign_insn = gen_copysigntf3_const;
15948 emit_insn (copysign_insn (dest, op0, op1, mask));
15952 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15954 nmask = ix86_build_signbit_mask (mode, 0, 1);
15955 mask = ix86_build_signbit_mask (mode, 0, 0);
15957 if (mode == SFmode)
15958 copysign_insn = gen_copysignsf3_var;
15959 else if (mode == DFmode)
15960 copysign_insn = gen_copysigndf3_var;
15962 copysign_insn = gen_copysigntf3_var;
15964 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15968 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15969 be a constant, and so has already been expanded into a vector constant. */
15972 ix86_split_copysign_const (rtx operands[])
15974 enum machine_mode mode, vmode;
15975 rtx dest, op0, mask, x;
15977 dest = operands[0];
15979 mask = operands[3];
15981 mode = GET_MODE (dest);
15982 vmode = GET_MODE (mask);
15984 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15985 x = gen_rtx_AND (vmode, dest, mask);
15986 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15988 if (op0 != CONST0_RTX (vmode))
15990 x = gen_rtx_IOR (vmode, dest, op0);
15991 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15995 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15996 so we have to do two masks. */
15999 ix86_split_copysign_var (rtx operands[])
16001 enum machine_mode mode, vmode;
16002 rtx dest, scratch, op0, op1, mask, nmask, x;
16004 dest = operands[0];
16005 scratch = operands[1];
16008 nmask = operands[4];
16009 mask = operands[5];
16011 mode = GET_MODE (dest);
16012 vmode = GET_MODE (mask);
16014 if (rtx_equal_p (op0, op1))
16016 /* Shouldn't happen often (it's useless, obviously), but when it does
16017 we'd generate incorrect code if we continue below. */
16018 emit_move_insn (dest, op0);
16022 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16024 gcc_assert (REGNO (op1) == REGNO (scratch));
16026 x = gen_rtx_AND (vmode, scratch, mask);
16027 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16030 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16031 x = gen_rtx_NOT (vmode, dest);
16032 x = gen_rtx_AND (vmode, x, op0);
16033 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16037 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16039 x = gen_rtx_AND (vmode, scratch, mask);
16041 else /* alternative 2,4 */
16043 gcc_assert (REGNO (mask) == REGNO (scratch));
16044 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16045 x = gen_rtx_AND (vmode, scratch, op1);
16047 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16049 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16051 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16052 x = gen_rtx_AND (vmode, dest, nmask);
16054 else /* alternative 3,4 */
16056 gcc_assert (REGNO (nmask) == REGNO (dest));
16058 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16059 x = gen_rtx_AND (vmode, dest, op0);
16061 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16064 x = gen_rtx_IOR (vmode, dest, scratch);
16065 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16068 /* Return TRUE or FALSE depending on whether the first SET in INSN
16069 has source and destination with matching CC modes, and that the
16070 CC mode is at least as constrained as REQ_MODE. */
16073 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16076 enum machine_mode set_mode;
16078 set = PATTERN (insn);
16079 if (GET_CODE (set) == PARALLEL)
16080 set = XVECEXP (set, 0, 0);
16081 gcc_assert (GET_CODE (set) == SET);
16082 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16084 set_mode = GET_MODE (SET_DEST (set));
16088 if (req_mode != CCNOmode
16089 && (req_mode != CCmode
16090 || XEXP (SET_SRC (set), 1) != const0_rtx))
16094 if (req_mode == CCGCmode)
16098 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16102 if (req_mode == CCZmode)
16113 gcc_unreachable ();
16116 return GET_MODE (SET_SRC (set)) == set_mode;
16119 /* Generate insn patterns to do an integer compare of OPERANDS. */
16122 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16124 enum machine_mode cmpmode;
16127 cmpmode = SELECT_CC_MODE (code, op0, op1);
16128 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16130 /* This is very simple, but making the interface the same as in the
16131 FP case makes the rest of the code easier. */
16132 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16133 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16135 /* Return the test that should be put into the flags user, i.e.
16136 the bcc, scc, or cmov instruction. */
16137 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16140 /* Figure out whether to use ordered or unordered fp comparisons.
16141 Return the appropriate mode to use. */
16144 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16146 /* ??? In order to make all comparisons reversible, we do all comparisons
16147 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16148 all forms trapping and nontrapping comparisons, we can make inequality
16149 comparisons trapping again, since it results in better code when using
16150 FCOM based compares. */
16151 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16155 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16157 enum machine_mode mode = GET_MODE (op0);
16159 if (SCALAR_FLOAT_MODE_P (mode))
16161 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16162 return ix86_fp_compare_mode (code);
16167 /* Only zero flag is needed. */
16168 case EQ: /* ZF=0 */
16169 case NE: /* ZF!=0 */
16171 /* Codes needing carry flag. */
16172 case GEU: /* CF=0 */
16173 case LTU: /* CF=1 */
16174 /* Detect overflow checks. They need just the carry flag. */
16175 if (GET_CODE (op0) == PLUS
16176 && rtx_equal_p (op1, XEXP (op0, 0)))
16180 case GTU: /* CF=0 & ZF=0 */
16181 case LEU: /* CF=1 | ZF=1 */
16182 /* Detect overflow checks. They need just the carry flag. */
16183 if (GET_CODE (op0) == MINUS
16184 && rtx_equal_p (op1, XEXP (op0, 0)))
16188 /* Codes possibly doable only with sign flag when
16189 comparing against zero. */
16190 case GE: /* SF=OF or SF=0 */
16191 case LT: /* SF<>OF or SF=1 */
16192 if (op1 == const0_rtx)
16195 /* For other cases Carry flag is not required. */
16197 /* Codes doable only with sign flag when comparing
16198 against zero, but we miss jump instruction for it
16199 so we need to use relational tests against overflow
16200 that thus needs to be zero. */
16201 case GT: /* ZF=0 & SF=OF */
16202 case LE: /* ZF=1 | SF<>OF */
16203 if (op1 == const0_rtx)
16207 /* strcmp pattern do (use flags) and combine may ask us for proper
16212 gcc_unreachable ();
16216 /* Return the fixed registers used for condition codes. */
16219 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16226 /* If two condition code modes are compatible, return a condition code
16227 mode which is compatible with both. Otherwise, return
16230 static enum machine_mode
16231 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16236 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16239 if ((m1 == CCGCmode && m2 == CCGOCmode)
16240 || (m1 == CCGOCmode && m2 == CCGCmode))
16246 gcc_unreachable ();
16276 /* These are only compatible with themselves, which we already
16283 /* Return a comparison we can do and that it is equivalent to
16284 swap_condition (code) apart possibly from orderedness.
16285 But, never change orderedness if TARGET_IEEE_FP, returning
16286 UNKNOWN in that case if necessary. */
16288 static enum rtx_code
16289 ix86_fp_swap_condition (enum rtx_code code)
16293 case GT: /* GTU - CF=0 & ZF=0 */
16294 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16295 case GE: /* GEU - CF=0 */
16296 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16297 case UNLT: /* LTU - CF=1 */
16298 return TARGET_IEEE_FP ? UNKNOWN : GT;
16299 case UNLE: /* LEU - CF=1 | ZF=1 */
16300 return TARGET_IEEE_FP ? UNKNOWN : GE;
16302 return swap_condition (code);
16306 /* Return cost of comparison CODE using the best strategy for performance.
16307 All following functions do use number of instructions as a cost metrics.
16308 In future this should be tweaked to compute bytes for optimize_size and
16309 take into account performance of various instructions on various CPUs. */
16312 ix86_fp_comparison_cost (enum rtx_code code)
16316 /* The cost of code using bit-twiddling on %ah. */
16333 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16337 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16340 gcc_unreachable ();
16343 switch (ix86_fp_comparison_strategy (code))
16345 case IX86_FPCMP_COMI:
16346 return arith_cost > 4 ? 3 : 2;
16347 case IX86_FPCMP_SAHF:
16348 return arith_cost > 4 ? 4 : 3;
16354 /* Return strategy to use for floating-point. We assume that fcomi is always
16355 preferrable where available, since that is also true when looking at size
16356 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16358 enum ix86_fpcmp_strategy
16359 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16361 /* Do fcomi/sahf based test when profitable. */
16364 return IX86_FPCMP_COMI;
16366 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16367 return IX86_FPCMP_SAHF;
16369 return IX86_FPCMP_ARITH;
16372 /* Swap, force into registers, or otherwise massage the two operands
16373 to a fp comparison. The operands are updated in place; the new
16374 comparison code is returned. */
16376 static enum rtx_code
16377 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16379 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16380 rtx op0 = *pop0, op1 = *pop1;
16381 enum machine_mode op_mode = GET_MODE (op0);
16382 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16384 /* All of the unordered compare instructions only work on registers.
16385 The same is true of the fcomi compare instructions. The XFmode
16386 compare instructions require registers except when comparing
16387 against zero or when converting operand 1 from fixed point to
16391 && (fpcmp_mode == CCFPUmode
16392 || (op_mode == XFmode
16393 && ! (standard_80387_constant_p (op0) == 1
16394 || standard_80387_constant_p (op1) == 1)
16395 && GET_CODE (op1) != FLOAT)
16396 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16398 op0 = force_reg (op_mode, op0);
16399 op1 = force_reg (op_mode, op1);
16403 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16404 things around if they appear profitable, otherwise force op0
16405 into a register. */
16407 if (standard_80387_constant_p (op0) == 0
16409 && ! (standard_80387_constant_p (op1) == 0
16412 enum rtx_code new_code = ix86_fp_swap_condition (code);
16413 if (new_code != UNKNOWN)
16416 tmp = op0, op0 = op1, op1 = tmp;
16422 op0 = force_reg (op_mode, op0);
16424 if (CONSTANT_P (op1))
16426 int tmp = standard_80387_constant_p (op1);
16428 op1 = validize_mem (force_const_mem (op_mode, op1));
16432 op1 = force_reg (op_mode, op1);
16435 op1 = force_reg (op_mode, op1);
16439 /* Try to rearrange the comparison to make it cheaper. */
16440 if (ix86_fp_comparison_cost (code)
16441 > ix86_fp_comparison_cost (swap_condition (code))
16442 && (REG_P (op1) || can_create_pseudo_p ()))
16445 tmp = op0, op0 = op1, op1 = tmp;
16446 code = swap_condition (code);
16448 op0 = force_reg (op_mode, op0);
16456 /* Convert comparison codes we use to represent FP comparison to integer
16457 code that will result in proper branch. Return UNKNOWN if no such code
16461 ix86_fp_compare_code_to_integer (enum rtx_code code)
16490 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16493 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16495 enum machine_mode fpcmp_mode, intcmp_mode;
16498 fpcmp_mode = ix86_fp_compare_mode (code);
16499 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16501 /* Do fcomi/sahf based test when profitable. */
16502 switch (ix86_fp_comparison_strategy (code))
16504 case IX86_FPCMP_COMI:
16505 intcmp_mode = fpcmp_mode;
16506 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16507 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16512 case IX86_FPCMP_SAHF:
16513 intcmp_mode = fpcmp_mode;
16514 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16515 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16519 scratch = gen_reg_rtx (HImode);
16520 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16521 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16524 case IX86_FPCMP_ARITH:
16525 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16526 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16527 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16529 scratch = gen_reg_rtx (HImode);
16530 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16532 /* In the unordered case, we have to check C2 for NaN's, which
16533 doesn't happen to work out to anything nice combination-wise.
16534 So do some bit twiddling on the value we've got in AH to come
16535 up with an appropriate set of condition codes. */
16537 intcmp_mode = CCNOmode;
16542 if (code == GT || !TARGET_IEEE_FP)
16544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16549 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16550 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16551 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16552 intcmp_mode = CCmode;
16558 if (code == LT && TARGET_IEEE_FP)
16560 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16561 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16562 intcmp_mode = CCmode;
16567 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16573 if (code == GE || !TARGET_IEEE_FP)
16575 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16580 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16581 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16587 if (code == LE && TARGET_IEEE_FP)
16589 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16590 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16591 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16592 intcmp_mode = CCmode;
16597 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16603 if (code == EQ && TARGET_IEEE_FP)
16605 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16606 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16607 intcmp_mode = CCmode;
16612 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16618 if (code == NE && TARGET_IEEE_FP)
16620 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16621 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16627 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16633 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16637 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16642 gcc_unreachable ();
16650 /* Return the test that should be put into the flags user, i.e.
16651 the bcc, scc, or cmov instruction. */
16652 return gen_rtx_fmt_ee (code, VOIDmode,
16653 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16658 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16662 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16663 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16665 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16667 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16668 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16671 ret = ix86_expand_int_compare (code, op0, op1);
16677 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16679 enum machine_mode mode = GET_MODE (op0);
16691 tmp = ix86_expand_compare (code, op0, op1);
16692 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16693 gen_rtx_LABEL_REF (VOIDmode, label),
16695 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16702 /* Expand DImode branch into multiple compare+branch. */
16704 rtx lo[2], hi[2], label2;
16705 enum rtx_code code1, code2, code3;
16706 enum machine_mode submode;
16708 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16710 tmp = op0, op0 = op1, op1 = tmp;
16711 code = swap_condition (code);
16714 split_double_mode (mode, &op0, 1, lo+0, hi+0);
16715 split_double_mode (mode, &op1, 1, lo+1, hi+1);
16717 submode = mode == DImode ? SImode : DImode;
16719 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16720 avoid two branches. This costs one extra insn, so disable when
16721 optimizing for size. */
16723 if ((code == EQ || code == NE)
16724 && (!optimize_insn_for_size_p ()
16725 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16730 if (hi[1] != const0_rtx)
16731 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16732 NULL_RTX, 0, OPTAB_WIDEN);
16735 if (lo[1] != const0_rtx)
16736 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16737 NULL_RTX, 0, OPTAB_WIDEN);
16739 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16740 NULL_RTX, 0, OPTAB_WIDEN);
16742 ix86_expand_branch (code, tmp, const0_rtx, label);
16746 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16747 op1 is a constant and the low word is zero, then we can just
16748 examine the high word. Similarly for low word -1 and
16749 less-or-equal-than or greater-than. */
16751 if (CONST_INT_P (hi[1]))
16754 case LT: case LTU: case GE: case GEU:
16755 if (lo[1] == const0_rtx)
16757 ix86_expand_branch (code, hi[0], hi[1], label);
16761 case LE: case LEU: case GT: case GTU:
16762 if (lo[1] == constm1_rtx)
16764 ix86_expand_branch (code, hi[0], hi[1], label);
16772 /* Otherwise, we need two or three jumps. */
16774 label2 = gen_label_rtx ();
16777 code2 = swap_condition (code);
16778 code3 = unsigned_condition (code);
16782 case LT: case GT: case LTU: case GTU:
16785 case LE: code1 = LT; code2 = GT; break;
16786 case GE: code1 = GT; code2 = LT; break;
16787 case LEU: code1 = LTU; code2 = GTU; break;
16788 case GEU: code1 = GTU; code2 = LTU; break;
16790 case EQ: code1 = UNKNOWN; code2 = NE; break;
16791 case NE: code2 = UNKNOWN; break;
16794 gcc_unreachable ();
16799 * if (hi(a) < hi(b)) goto true;
16800 * if (hi(a) > hi(b)) goto false;
16801 * if (lo(a) < lo(b)) goto true;
16805 if (code1 != UNKNOWN)
16806 ix86_expand_branch (code1, hi[0], hi[1], label);
16807 if (code2 != UNKNOWN)
16808 ix86_expand_branch (code2, hi[0], hi[1], label2);
16810 ix86_expand_branch (code3, lo[0], lo[1], label);
16812 if (code2 != UNKNOWN)
16813 emit_label (label2);
16818 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16823 /* Split branch based on floating point condition. */
16825 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16826 rtx target1, rtx target2, rtx tmp, rtx pushed)
16831 if (target2 != pc_rtx)
16834 code = reverse_condition_maybe_unordered (code);
16839 condition = ix86_expand_fp_compare (code, op1, op2,
16842 /* Remove pushed operand from stack. */
16844 ix86_free_from_memory (GET_MODE (pushed));
16846 i = emit_jump_insn (gen_rtx_SET
16848 gen_rtx_IF_THEN_ELSE (VOIDmode,
16849 condition, target1, target2)));
16850 if (split_branch_probability >= 0)
16851 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16855 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16859 gcc_assert (GET_MODE (dest) == QImode);
16861 ret = ix86_expand_compare (code, op0, op1);
16862 PUT_MODE (ret, QImode);
16863 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16866 /* Expand comparison setting or clearing carry flag. Return true when
16867 successful and set pop for the operation. */
16869 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16871 enum machine_mode mode =
16872 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16874 /* Do not handle double-mode compares that go through special path. */
16875 if (mode == (TARGET_64BIT ? TImode : DImode))
16878 if (SCALAR_FLOAT_MODE_P (mode))
16880 rtx compare_op, compare_seq;
16882 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16884 /* Shortcut: following common codes never translate
16885 into carry flag compares. */
16886 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16887 || code == ORDERED || code == UNORDERED)
16890 /* These comparisons require zero flag; swap operands so they won't. */
16891 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16892 && !TARGET_IEEE_FP)
16897 code = swap_condition (code);
16900 /* Try to expand the comparison and verify that we end up with
16901 carry flag based comparison. This fails to be true only when
16902 we decide to expand comparison using arithmetic that is not
16903 too common scenario. */
16905 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16906 compare_seq = get_insns ();
16909 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16910 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16911 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16913 code = GET_CODE (compare_op);
16915 if (code != LTU && code != GEU)
16918 emit_insn (compare_seq);
16923 if (!INTEGRAL_MODE_P (mode))
16932 /* Convert a==0 into (unsigned)a<1. */
16935 if (op1 != const0_rtx)
16938 code = (code == EQ ? LTU : GEU);
16941 /* Convert a>b into b<a or a>=b-1. */
16944 if (CONST_INT_P (op1))
16946 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16947 /* Bail out on overflow. We still can swap operands but that
16948 would force loading of the constant into register. */
16949 if (op1 == const0_rtx
16950 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16952 code = (code == GTU ? GEU : LTU);
16959 code = (code == GTU ? LTU : GEU);
16963 /* Convert a>=0 into (unsigned)a<0x80000000. */
16966 if (mode == DImode || op1 != const0_rtx)
16968 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16969 code = (code == LT ? GEU : LTU);
16973 if (mode == DImode || op1 != constm1_rtx)
16975 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16976 code = (code == LE ? GEU : LTU);
16982 /* Swapping operands may cause constant to appear as first operand. */
16983 if (!nonimmediate_operand (op0, VOIDmode))
16985 if (!can_create_pseudo_p ())
16987 op0 = force_reg (mode, op0);
16989 *pop = ix86_expand_compare (code, op0, op1);
16990 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16995 ix86_expand_int_movcc (rtx operands[])
16997 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16998 rtx compare_seq, compare_op;
16999 enum machine_mode mode = GET_MODE (operands[0]);
17000 bool sign_bit_compare_p = false;
17001 rtx op0 = XEXP (operands[1], 0);
17002 rtx op1 = XEXP (operands[1], 1);
17005 compare_op = ix86_expand_compare (code, op0, op1);
17006 compare_seq = get_insns ();
17009 compare_code = GET_CODE (compare_op);
17011 if ((op1 == const0_rtx && (code == GE || code == LT))
17012 || (op1 == constm1_rtx && (code == GT || code == LE)))
17013 sign_bit_compare_p = true;
17015 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17016 HImode insns, we'd be swallowed in word prefix ops. */
17018 if ((mode != HImode || TARGET_FAST_PREFIX)
17019 && (mode != (TARGET_64BIT ? TImode : DImode))
17020 && CONST_INT_P (operands[2])
17021 && CONST_INT_P (operands[3]))
17023 rtx out = operands[0];
17024 HOST_WIDE_INT ct = INTVAL (operands[2]);
17025 HOST_WIDE_INT cf = INTVAL (operands[3]);
17026 HOST_WIDE_INT diff;
17029 /* Sign bit compares are better done using shifts than we do by using
17031 if (sign_bit_compare_p
17032 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17034 /* Detect overlap between destination and compare sources. */
17037 if (!sign_bit_compare_p)
17040 bool fpcmp = false;
17042 compare_code = GET_CODE (compare_op);
17044 flags = XEXP (compare_op, 0);
17046 if (GET_MODE (flags) == CCFPmode
17047 || GET_MODE (flags) == CCFPUmode)
17051 = ix86_fp_compare_code_to_integer (compare_code);
17054 /* To simplify rest of code, restrict to the GEU case. */
17055 if (compare_code == LTU)
17057 HOST_WIDE_INT tmp = ct;
17060 compare_code = reverse_condition (compare_code);
17061 code = reverse_condition (code);
17066 PUT_CODE (compare_op,
17067 reverse_condition_maybe_unordered
17068 (GET_CODE (compare_op)));
17070 PUT_CODE (compare_op,
17071 reverse_condition (GET_CODE (compare_op)));
17075 if (reg_overlap_mentioned_p (out, op0)
17076 || reg_overlap_mentioned_p (out, op1))
17077 tmp = gen_reg_rtx (mode);
17079 if (mode == DImode)
17080 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17082 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17083 flags, compare_op));
17087 if (code == GT || code == GE)
17088 code = reverse_condition (code);
17091 HOST_WIDE_INT tmp = ct;
17096 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17109 tmp = expand_simple_binop (mode, PLUS,
17111 copy_rtx (tmp), 1, OPTAB_DIRECT);
17122 tmp = expand_simple_binop (mode, IOR,
17124 copy_rtx (tmp), 1, OPTAB_DIRECT);
17126 else if (diff == -1 && ct)
17136 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17138 tmp = expand_simple_binop (mode, PLUS,
17139 copy_rtx (tmp), GEN_INT (cf),
17140 copy_rtx (tmp), 1, OPTAB_DIRECT);
17148 * andl cf - ct, dest
17158 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17161 tmp = expand_simple_binop (mode, AND,
17163 gen_int_mode (cf - ct, mode),
17164 copy_rtx (tmp), 1, OPTAB_DIRECT);
17166 tmp = expand_simple_binop (mode, PLUS,
17167 copy_rtx (tmp), GEN_INT (ct),
17168 copy_rtx (tmp), 1, OPTAB_DIRECT);
17171 if (!rtx_equal_p (tmp, out))
17172 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17179 enum machine_mode cmp_mode = GET_MODE (op0);
17182 tmp = ct, ct = cf, cf = tmp;
17185 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17187 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17189 /* We may be reversing unordered compare to normal compare, that
17190 is not valid in general (we may convert non-trapping condition
17191 to trapping one), however on i386 we currently emit all
17192 comparisons unordered. */
17193 compare_code = reverse_condition_maybe_unordered (compare_code);
17194 code = reverse_condition_maybe_unordered (code);
17198 compare_code = reverse_condition (compare_code);
17199 code = reverse_condition (code);
17203 compare_code = UNKNOWN;
17204 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17205 && CONST_INT_P (op1))
17207 if (op1 == const0_rtx
17208 && (code == LT || code == GE))
17209 compare_code = code;
17210 else if (op1 == constm1_rtx)
17214 else if (code == GT)
17219 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17220 if (compare_code != UNKNOWN
17221 && GET_MODE (op0) == GET_MODE (out)
17222 && (cf == -1 || ct == -1))
17224 /* If lea code below could be used, only optimize
17225 if it results in a 2 insn sequence. */
17227 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17228 || diff == 3 || diff == 5 || diff == 9)
17229 || (compare_code == LT && ct == -1)
17230 || (compare_code == GE && cf == -1))
17233 * notl op1 (if necessary)
17241 code = reverse_condition (code);
17244 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17246 out = expand_simple_binop (mode, IOR,
17248 out, 1, OPTAB_DIRECT);
17249 if (out != operands[0])
17250 emit_move_insn (operands[0], out);
17257 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17258 || diff == 3 || diff == 5 || diff == 9)
17259 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17261 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17267 * lea cf(dest*(ct-cf)),dest
17271 * This also catches the degenerate setcc-only case.
17277 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17280 /* On x86_64 the lea instruction operates on Pmode, so we need
17281 to get arithmetics done in proper mode to match. */
17283 tmp = copy_rtx (out);
17287 out1 = copy_rtx (out);
17288 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17292 tmp = gen_rtx_PLUS (mode, tmp, out1);
17298 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17301 if (!rtx_equal_p (tmp, out))
17304 out = force_operand (tmp, copy_rtx (out));
17306 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17308 if (!rtx_equal_p (out, operands[0]))
17309 emit_move_insn (operands[0], copy_rtx (out));
17315 * General case: Jumpful:
17316 * xorl dest,dest cmpl op1, op2
17317 * cmpl op1, op2 movl ct, dest
17318 * setcc dest jcc 1f
17319 * decl dest movl cf, dest
17320 * andl (cf-ct),dest 1:
17323 * Size 20. Size 14.
17325 * This is reasonably steep, but branch mispredict costs are
17326 * high on modern cpus, so consider failing only if optimizing
17330 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17331 && BRANCH_COST (optimize_insn_for_speed_p (),
17336 enum machine_mode cmp_mode = GET_MODE (op0);
17341 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17343 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17345 /* We may be reversing unordered compare to normal compare,
17346 that is not valid in general (we may convert non-trapping
17347 condition to trapping one), however on i386 we currently
17348 emit all comparisons unordered. */
17349 code = reverse_condition_maybe_unordered (code);
17353 code = reverse_condition (code);
17354 if (compare_code != UNKNOWN)
17355 compare_code = reverse_condition (compare_code);
17359 if (compare_code != UNKNOWN)
17361 /* notl op1 (if needed)
17366 For x < 0 (resp. x <= -1) there will be no notl,
17367 so if possible swap the constants to get rid of the
17369 True/false will be -1/0 while code below (store flag
17370 followed by decrement) is 0/-1, so the constants need
17371 to be exchanged once more. */
17373 if (compare_code == GE || !cf)
17375 code = reverse_condition (code);
17380 HOST_WIDE_INT tmp = cf;
17385 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17389 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17391 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17393 copy_rtx (out), 1, OPTAB_DIRECT);
17396 out = expand_simple_binop (mode, AND, copy_rtx (out),
17397 gen_int_mode (cf - ct, mode),
17398 copy_rtx (out), 1, OPTAB_DIRECT);
17400 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17401 copy_rtx (out), 1, OPTAB_DIRECT);
17402 if (!rtx_equal_p (out, operands[0]))
17403 emit_move_insn (operands[0], copy_rtx (out));
17409 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17411 /* Try a few things more with specific constants and a variable. */
17414 rtx var, orig_out, out, tmp;
17416 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17419 /* If one of the two operands is an interesting constant, load a
17420 constant with the above and mask it in with a logical operation. */
17422 if (CONST_INT_P (operands[2]))
17425 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17426 operands[3] = constm1_rtx, op = and_optab;
17427 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17428 operands[3] = const0_rtx, op = ior_optab;
17432 else if (CONST_INT_P (operands[3]))
17435 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17436 operands[2] = constm1_rtx, op = and_optab;
17437 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17438 operands[2] = const0_rtx, op = ior_optab;
17445 orig_out = operands[0];
17446 tmp = gen_reg_rtx (mode);
17449 /* Recurse to get the constant loaded. */
17450 if (ix86_expand_int_movcc (operands) == 0)
17453 /* Mask in the interesting variable. */
17454 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17456 if (!rtx_equal_p (out, orig_out))
17457 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17463 * For comparison with above,
17473 if (! nonimmediate_operand (operands[2], mode))
17474 operands[2] = force_reg (mode, operands[2]);
17475 if (! nonimmediate_operand (operands[3], mode))
17476 operands[3] = force_reg (mode, operands[3]);
17478 if (! register_operand (operands[2], VOIDmode)
17480 || ! register_operand (operands[3], VOIDmode)))
17481 operands[2] = force_reg (mode, operands[2]);
17484 && ! register_operand (operands[3], VOIDmode))
17485 operands[3] = force_reg (mode, operands[3]);
17487 emit_insn (compare_seq);
17488 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17489 gen_rtx_IF_THEN_ELSE (mode,
17490 compare_op, operands[2],
17495 /* Swap, force into registers, or otherwise massage the two operands
17496 to an sse comparison with a mask result. Thus we differ a bit from
17497 ix86_prepare_fp_compare_args which expects to produce a flags result.
17499 The DEST operand exists to help determine whether to commute commutative
17500 operators. The POP0/POP1 operands are updated in place. The new
17501 comparison code is returned, or UNKNOWN if not implementable. */
17503 static enum rtx_code
17504 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17505 rtx *pop0, rtx *pop1)
17513 /* We have no LTGT as an operator. We could implement it with
17514 NE & ORDERED, but this requires an extra temporary. It's
17515 not clear that it's worth it. */
17522 /* These are supported directly. */
17529 /* For commutative operators, try to canonicalize the destination
17530 operand to be first in the comparison - this helps reload to
17531 avoid extra moves. */
17532 if (!dest || !rtx_equal_p (dest, *pop1))
17540 /* These are not supported directly. Swap the comparison operands
17541 to transform into something that is supported. */
17545 code = swap_condition (code);
17549 gcc_unreachable ();
17555 /* Detect conditional moves that exactly match min/max operational
17556 semantics. Note that this is IEEE safe, as long as we don't
17557 interchange the operands.
17559 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17560 and TRUE if the operation is successful and instructions are emitted. */
17563 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17564 rtx cmp_op1, rtx if_true, rtx if_false)
17566 enum machine_mode mode;
17572 else if (code == UNGE)
17575 if_true = if_false;
17581 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17583 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17588 mode = GET_MODE (dest);
17590 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17591 but MODE may be a vector mode and thus not appropriate. */
17592 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17594 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17597 if_true = force_reg (mode, if_true);
17598 v = gen_rtvec (2, if_true, if_false);
17599 tmp = gen_rtx_UNSPEC (mode, v, u);
17603 code = is_min ? SMIN : SMAX;
17604 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17607 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17611 /* Expand an sse vector comparison. Return the register with the result. */
17614 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17615 rtx op_true, rtx op_false)
17617 enum machine_mode mode = GET_MODE (dest);
17620 cmp_op0 = force_reg (mode, cmp_op0);
17621 if (!nonimmediate_operand (cmp_op1, mode))
17622 cmp_op1 = force_reg (mode, cmp_op1);
17625 || reg_overlap_mentioned_p (dest, op_true)
17626 || reg_overlap_mentioned_p (dest, op_false))
17627 dest = gen_reg_rtx (mode);
17629 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17630 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17635 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17636 operations. This is used for both scalar and vector conditional moves. */
17639 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17641 enum machine_mode mode = GET_MODE (dest);
17644 if (op_false == CONST0_RTX (mode))
17646 op_true = force_reg (mode, op_true);
17647 x = gen_rtx_AND (mode, cmp, op_true);
17648 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17650 else if (op_true == CONST0_RTX (mode))
17652 op_false = force_reg (mode, op_false);
17653 x = gen_rtx_NOT (mode, cmp);
17654 x = gen_rtx_AND (mode, x, op_false);
17655 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17657 else if (TARGET_XOP)
17659 rtx pcmov = gen_rtx_SET (mode, dest,
17660 gen_rtx_IF_THEN_ELSE (mode, cmp,
17667 op_true = force_reg (mode, op_true);
17668 op_false = force_reg (mode, op_false);
17670 t2 = gen_reg_rtx (mode);
17672 t3 = gen_reg_rtx (mode);
17676 x = gen_rtx_AND (mode, op_true, cmp);
17677 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17679 x = gen_rtx_NOT (mode, cmp);
17680 x = gen_rtx_AND (mode, x, op_false);
17681 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17683 x = gen_rtx_IOR (mode, t3, t2);
17684 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17688 /* Expand a floating-point conditional move. Return true if successful. */
17691 ix86_expand_fp_movcc (rtx operands[])
17693 enum machine_mode mode = GET_MODE (operands[0]);
17694 enum rtx_code code = GET_CODE (operands[1]);
17695 rtx tmp, compare_op;
17696 rtx op0 = XEXP (operands[1], 0);
17697 rtx op1 = XEXP (operands[1], 1);
17699 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17701 enum machine_mode cmode;
17703 /* Since we've no cmove for sse registers, don't force bad register
17704 allocation just to gain access to it. Deny movcc when the
17705 comparison mode doesn't match the move mode. */
17706 cmode = GET_MODE (op0);
17707 if (cmode == VOIDmode)
17708 cmode = GET_MODE (op1);
17712 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17713 if (code == UNKNOWN)
17716 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17717 operands[2], operands[3]))
17720 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17721 operands[2], operands[3]);
17722 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17726 /* The floating point conditional move instructions don't directly
17727 support conditions resulting from a signed integer comparison. */
17729 compare_op = ix86_expand_compare (code, op0, op1);
17730 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17732 tmp = gen_reg_rtx (QImode);
17733 ix86_expand_setcc (tmp, code, op0, op1);
17735 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17738 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17739 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17740 operands[2], operands[3])));
17745 /* Expand a floating-point vector conditional move; a vcond operation
17746 rather than a movcc operation. */
17749 ix86_expand_fp_vcond (rtx operands[])
17751 enum rtx_code code = GET_CODE (operands[3]);
17754 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17755 &operands[4], &operands[5]);
17756 if (code == UNKNOWN)
17759 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17760 operands[5], operands[1], operands[2]))
17763 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17764 operands[1], operands[2]);
17765 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17769 /* Expand a signed/unsigned integral vector conditional move. */
17772 ix86_expand_int_vcond (rtx operands[])
17774 enum machine_mode mode = GET_MODE (operands[0]);
17775 enum rtx_code code = GET_CODE (operands[3]);
17776 bool negate = false;
17779 cop0 = operands[4];
17780 cop1 = operands[5];
17782 /* XOP supports all of the comparisons on all vector int types. */
17785 /* Canonicalize the comparison to EQ, GT, GTU. */
17796 code = reverse_condition (code);
17802 code = reverse_condition (code);
17808 code = swap_condition (code);
17809 x = cop0, cop0 = cop1, cop1 = x;
17813 gcc_unreachable ();
17816 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17817 if (mode == V2DImode)
17822 /* SSE4.1 supports EQ. */
17823 if (!TARGET_SSE4_1)
17829 /* SSE4.2 supports GT/GTU. */
17830 if (!TARGET_SSE4_2)
17835 gcc_unreachable ();
17839 /* Unsigned parallel compare is not supported by the hardware.
17840 Play some tricks to turn this into a signed comparison
17844 cop0 = force_reg (mode, cop0);
17852 rtx (*gen_sub3) (rtx, rtx, rtx);
17854 /* Subtract (-(INT MAX) - 1) from both operands to make
17856 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17858 gen_sub3 = (mode == V4SImode
17859 ? gen_subv4si3 : gen_subv2di3);
17860 t1 = gen_reg_rtx (mode);
17861 emit_insn (gen_sub3 (t1, cop0, mask));
17863 t2 = gen_reg_rtx (mode);
17864 emit_insn (gen_sub3 (t2, cop1, mask));
17874 /* Perform a parallel unsigned saturating subtraction. */
17875 x = gen_reg_rtx (mode);
17876 emit_insn (gen_rtx_SET (VOIDmode, x,
17877 gen_rtx_US_MINUS (mode, cop0, cop1)));
17880 cop1 = CONST0_RTX (mode);
17886 gcc_unreachable ();
17891 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17892 operands[1+negate], operands[2-negate]);
17894 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17895 operands[2-negate]);
17899 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17900 true if we should do zero extension, else sign extension. HIGH_P is
17901 true if we want the N/2 high elements, else the low elements. */
17904 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17906 enum machine_mode imode = GET_MODE (operands[1]);
17907 rtx (*unpack)(rtx, rtx, rtx);
17914 unpack = gen_vec_interleave_highv16qi;
17916 unpack = gen_vec_interleave_lowv16qi;
17920 unpack = gen_vec_interleave_highv8hi;
17922 unpack = gen_vec_interleave_lowv8hi;
17926 unpack = gen_vec_interleave_highv4si;
17928 unpack = gen_vec_interleave_lowv4si;
17931 gcc_unreachable ();
17934 dest = gen_lowpart (imode, operands[0]);
17937 se = force_reg (imode, CONST0_RTX (imode));
17939 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17940 operands[1], pc_rtx, pc_rtx);
17942 emit_insn (unpack (dest, operands[1], se));
17945 /* This function performs the same task as ix86_expand_sse_unpack,
17946 but with SSE4.1 instructions. */
17949 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17951 enum machine_mode imode = GET_MODE (operands[1]);
17952 rtx (*unpack)(rtx, rtx);
17959 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17961 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17965 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17967 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17971 unpack = gen_sse4_1_zero_extendv2siv2di2;
17973 unpack = gen_sse4_1_sign_extendv2siv2di2;
17976 gcc_unreachable ();
17979 dest = operands[0];
17982 /* Shift higher 8 bytes to lower 8 bytes. */
17983 src = gen_reg_rtx (imode);
17984 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17985 gen_lowpart (V1TImode, operands[1]),
17991 emit_insn (unpack (dest, src));
17994 /* Expand conditional increment or decrement using adb/sbb instructions.
17995 The default case using setcc followed by the conditional move can be
17996 done by generic code. */
17998 ix86_expand_int_addcc (rtx operands[])
18000 enum rtx_code code = GET_CODE (operands[1]);
18002 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18004 rtx val = const0_rtx;
18005 bool fpcmp = false;
18006 enum machine_mode mode;
18007 rtx op0 = XEXP (operands[1], 0);
18008 rtx op1 = XEXP (operands[1], 1);
18010 if (operands[3] != const1_rtx
18011 && operands[3] != constm1_rtx)
18013 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18015 code = GET_CODE (compare_op);
18017 flags = XEXP (compare_op, 0);
18019 if (GET_MODE (flags) == CCFPmode
18020 || GET_MODE (flags) == CCFPUmode)
18023 code = ix86_fp_compare_code_to_integer (code);
18030 PUT_CODE (compare_op,
18031 reverse_condition_maybe_unordered
18032 (GET_CODE (compare_op)));
18034 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18037 mode = GET_MODE (operands[0]);
18039 /* Construct either adc or sbb insn. */
18040 if ((code == LTU) == (operands[3] == constm1_rtx))
18045 insn = gen_subqi3_carry;
18048 insn = gen_subhi3_carry;
18051 insn = gen_subsi3_carry;
18054 insn = gen_subdi3_carry;
18057 gcc_unreachable ();
18065 insn = gen_addqi3_carry;
18068 insn = gen_addhi3_carry;
18071 insn = gen_addsi3_carry;
18074 insn = gen_adddi3_carry;
18077 gcc_unreachable ();
18080 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18086 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18087 but works for floating pointer parameters and nonoffsetable memories.
18088 For pushes, it returns just stack offsets; the values will be saved
18089 in the right order. Maximally three parts are generated. */
18092 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18097 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18099 size = (GET_MODE_SIZE (mode) + 4) / 8;
18101 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18102 gcc_assert (size >= 2 && size <= 4);
18104 /* Optimize constant pool reference to immediates. This is used by fp
18105 moves, that force all constants to memory to allow combining. */
18106 if (MEM_P (operand) && MEM_READONLY_P (operand))
18108 rtx tmp = maybe_get_pool_constant (operand);
18113 if (MEM_P (operand) && !offsettable_memref_p (operand))
18115 /* The only non-offsetable memories we handle are pushes. */
18116 int ok = push_operand (operand, VOIDmode);
18120 operand = copy_rtx (operand);
18121 PUT_MODE (operand, Pmode);
18122 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18126 if (GET_CODE (operand) == CONST_VECTOR)
18128 enum machine_mode imode = int_mode_for_mode (mode);
18129 /* Caution: if we looked through a constant pool memory above,
18130 the operand may actually have a different mode now. That's
18131 ok, since we want to pun this all the way back to an integer. */
18132 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18133 gcc_assert (operand != NULL);
18139 if (mode == DImode)
18140 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18145 if (REG_P (operand))
18147 gcc_assert (reload_completed);
18148 for (i = 0; i < size; i++)
18149 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18151 else if (offsettable_memref_p (operand))
18153 operand = adjust_address (operand, SImode, 0);
18154 parts[0] = operand;
18155 for (i = 1; i < size; i++)
18156 parts[i] = adjust_address (operand, SImode, 4 * i);
18158 else if (GET_CODE (operand) == CONST_DOUBLE)
18163 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18167 real_to_target (l, &r, mode);
18168 parts[3] = gen_int_mode (l[3], SImode);
18169 parts[2] = gen_int_mode (l[2], SImode);
18172 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18173 parts[2] = gen_int_mode (l[2], SImode);
18176 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18179 gcc_unreachable ();
18181 parts[1] = gen_int_mode (l[1], SImode);
18182 parts[0] = gen_int_mode (l[0], SImode);
18185 gcc_unreachable ();
18190 if (mode == TImode)
18191 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18192 if (mode == XFmode || mode == TFmode)
18194 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18195 if (REG_P (operand))
18197 gcc_assert (reload_completed);
18198 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18199 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18201 else if (offsettable_memref_p (operand))
18203 operand = adjust_address (operand, DImode, 0);
18204 parts[0] = operand;
18205 parts[1] = adjust_address (operand, upper_mode, 8);
18207 else if (GET_CODE (operand) == CONST_DOUBLE)
18212 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18213 real_to_target (l, &r, mode);
18215 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18216 if (HOST_BITS_PER_WIDE_INT >= 64)
18219 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18220 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18223 parts[0] = immed_double_const (l[0], l[1], DImode);
18225 if (upper_mode == SImode)
18226 parts[1] = gen_int_mode (l[2], SImode);
18227 else if (HOST_BITS_PER_WIDE_INT >= 64)
18230 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18231 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18234 parts[1] = immed_double_const (l[2], l[3], DImode);
18237 gcc_unreachable ();
18244 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18245 Return false when normal moves are needed; true when all required
18246 insns have been emitted. Operands 2-4 contain the input values
18247 int the correct order; operands 5-7 contain the output values. */
18250 ix86_split_long_move (rtx operands[])
18255 int collisions = 0;
18256 enum machine_mode mode = GET_MODE (operands[0]);
18257 bool collisionparts[4];
18259 /* The DFmode expanders may ask us to move double.
18260 For 64bit target this is single move. By hiding the fact
18261 here we simplify i386.md splitters. */
18262 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18264 /* Optimize constant pool reference to immediates. This is used by
18265 fp moves, that force all constants to memory to allow combining. */
18267 if (MEM_P (operands[1])
18268 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18269 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18270 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18271 if (push_operand (operands[0], VOIDmode))
18273 operands[0] = copy_rtx (operands[0]);
18274 PUT_MODE (operands[0], Pmode);
18277 operands[0] = gen_lowpart (DImode, operands[0]);
18278 operands[1] = gen_lowpart (DImode, operands[1]);
18279 emit_move_insn (operands[0], operands[1]);
18283 /* The only non-offsettable memory we handle is push. */
18284 if (push_operand (operands[0], VOIDmode))
18287 gcc_assert (!MEM_P (operands[0])
18288 || offsettable_memref_p (operands[0]));
18290 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18291 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18293 /* When emitting push, take care for source operands on the stack. */
18294 if (push && MEM_P (operands[1])
18295 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18297 rtx src_base = XEXP (part[1][nparts - 1], 0);
18299 /* Compensate for the stack decrement by 4. */
18300 if (!TARGET_64BIT && nparts == 3
18301 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18302 src_base = plus_constant (src_base, 4);
18304 /* src_base refers to the stack pointer and is
18305 automatically decreased by emitted push. */
18306 for (i = 0; i < nparts; i++)
18307 part[1][i] = change_address (part[1][i],
18308 GET_MODE (part[1][i]), src_base);
18311 /* We need to do copy in the right order in case an address register
18312 of the source overlaps the destination. */
18313 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18317 for (i = 0; i < nparts; i++)
18320 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18321 if (collisionparts[i])
18325 /* Collision in the middle part can be handled by reordering. */
18326 if (collisions == 1 && nparts == 3 && collisionparts [1])
18328 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18329 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18331 else if (collisions == 1
18333 && (collisionparts [1] || collisionparts [2]))
18335 if (collisionparts [1])
18337 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18338 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18342 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18343 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18347 /* If there are more collisions, we can't handle it by reordering.
18348 Do an lea to the last part and use only one colliding move. */
18349 else if (collisions > 1)
18355 base = part[0][nparts - 1];
18357 /* Handle the case when the last part isn't valid for lea.
18358 Happens in 64-bit mode storing the 12-byte XFmode. */
18359 if (GET_MODE (base) != Pmode)
18360 base = gen_rtx_REG (Pmode, REGNO (base));
18362 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18363 part[1][0] = replace_equiv_address (part[1][0], base);
18364 for (i = 1; i < nparts; i++)
18366 tmp = plus_constant (base, UNITS_PER_WORD * i);
18367 part[1][i] = replace_equiv_address (part[1][i], tmp);
18378 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18379 emit_insn (gen_addsi3 (stack_pointer_rtx,
18380 stack_pointer_rtx, GEN_INT (-4)));
18381 emit_move_insn (part[0][2], part[1][2]);
18383 else if (nparts == 4)
18385 emit_move_insn (part[0][3], part[1][3]);
18386 emit_move_insn (part[0][2], part[1][2]);
18391 /* In 64bit mode we don't have 32bit push available. In case this is
18392 register, it is OK - we will just use larger counterpart. We also
18393 retype memory - these comes from attempt to avoid REX prefix on
18394 moving of second half of TFmode value. */
18395 if (GET_MODE (part[1][1]) == SImode)
18397 switch (GET_CODE (part[1][1]))
18400 part[1][1] = adjust_address (part[1][1], DImode, 0);
18404 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18408 gcc_unreachable ();
18411 if (GET_MODE (part[1][0]) == SImode)
18412 part[1][0] = part[1][1];
18415 emit_move_insn (part[0][1], part[1][1]);
18416 emit_move_insn (part[0][0], part[1][0]);
18420 /* Choose correct order to not overwrite the source before it is copied. */
18421 if ((REG_P (part[0][0])
18422 && REG_P (part[1][1])
18423 && (REGNO (part[0][0]) == REGNO (part[1][1])
18425 && REGNO (part[0][0]) == REGNO (part[1][2]))
18427 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18429 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18431 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18433 operands[2 + i] = part[0][j];
18434 operands[6 + i] = part[1][j];
18439 for (i = 0; i < nparts; i++)
18441 operands[2 + i] = part[0][i];
18442 operands[6 + i] = part[1][i];
18446 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18447 if (optimize_insn_for_size_p ())
18449 for (j = 0; j < nparts - 1; j++)
18450 if (CONST_INT_P (operands[6 + j])
18451 && operands[6 + j] != const0_rtx
18452 && REG_P (operands[2 + j]))
18453 for (i = j; i < nparts - 1; i++)
18454 if (CONST_INT_P (operands[7 + i])
18455 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18456 operands[7 + i] = operands[2 + j];
18459 for (i = 0; i < nparts; i++)
18460 emit_move_insn (operands[2 + i], operands[6 + i]);
18465 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18466 left shift by a constant, either using a single shift or
18467 a sequence of add instructions. */
18470 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18472 rtx (*insn)(rtx, rtx, rtx);
18475 || (count * ix86_cost->add <= ix86_cost->shift_const
18476 && !optimize_insn_for_size_p ()))
18478 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
18479 while (count-- > 0)
18480 emit_insn (insn (operand, operand, operand));
18484 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18485 emit_insn (insn (operand, operand, GEN_INT (count)));
18490 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18492 rtx (*gen_ashl3)(rtx, rtx, rtx);
18493 rtx (*gen_shld)(rtx, rtx, rtx);
18494 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18496 rtx low[2], high[2];
18499 if (CONST_INT_P (operands[2]))
18501 split_double_mode (mode, operands, 2, low, high);
18502 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18504 if (count >= half_width)
18506 emit_move_insn (high[0], low[1]);
18507 emit_move_insn (low[0], const0_rtx);
18509 if (count > half_width)
18510 ix86_expand_ashl_const (high[0], count - half_width, mode);
18514 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18516 if (!rtx_equal_p (operands[0], operands[1]))
18517 emit_move_insn (operands[0], operands[1]);
18519 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
18520 ix86_expand_ashl_const (low[0], count, mode);
18525 split_double_mode (mode, operands, 1, low, high);
18527 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18529 if (operands[1] == const1_rtx)
18531 /* Assuming we've chosen a QImode capable registers, then 1 << N
18532 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18533 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18535 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18537 ix86_expand_clear (low[0]);
18538 ix86_expand_clear (high[0]);
18539 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
18541 d = gen_lowpart (QImode, low[0]);
18542 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18543 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18544 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18546 d = gen_lowpart (QImode, high[0]);
18547 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18548 s = gen_rtx_NE (QImode, flags, const0_rtx);
18549 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18552 /* Otherwise, we can get the same results by manually performing
18553 a bit extract operation on bit 5/6, and then performing the two
18554 shifts. The two methods of getting 0/1 into low/high are exactly
18555 the same size. Avoiding the shift in the bit extract case helps
18556 pentium4 a bit; no one else seems to care much either way. */
18559 enum machine_mode half_mode;
18560 rtx (*gen_lshr3)(rtx, rtx, rtx);
18561 rtx (*gen_and3)(rtx, rtx, rtx);
18562 rtx (*gen_xor3)(rtx, rtx, rtx);
18563 HOST_WIDE_INT bits;
18566 if (mode == DImode)
18568 half_mode = SImode;
18569 gen_lshr3 = gen_lshrsi3;
18570 gen_and3 = gen_andsi3;
18571 gen_xor3 = gen_xorsi3;
18576 half_mode = DImode;
18577 gen_lshr3 = gen_lshrdi3;
18578 gen_and3 = gen_anddi3;
18579 gen_xor3 = gen_xordi3;
18583 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18584 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
18586 x = gen_lowpart (half_mode, operands[2]);
18587 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18589 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
18590 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
18591 emit_move_insn (low[0], high[0]);
18592 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
18595 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18596 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
18600 if (operands[1] == constm1_rtx)
18602 /* For -1 << N, we can avoid the shld instruction, because we
18603 know that we're shifting 0...31/63 ones into a -1. */
18604 emit_move_insn (low[0], constm1_rtx);
18605 if (optimize_insn_for_size_p ())
18606 emit_move_insn (high[0], low[0]);
18608 emit_move_insn (high[0], constm1_rtx);
18612 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18614 if (!rtx_equal_p (operands[0], operands[1]))
18615 emit_move_insn (operands[0], operands[1]);
18617 split_double_mode (mode, operands, 1, low, high);
18618 emit_insn (gen_shld (high[0], low[0], operands[2]));
18621 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18623 if (TARGET_CMOVE && scratch)
18625 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18626 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18628 ix86_expand_clear (scratch);
18629 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
18633 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18634 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18636 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
18641 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18643 rtx (*gen_ashr3)(rtx, rtx, rtx)
18644 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
18645 rtx (*gen_shrd)(rtx, rtx, rtx);
18646 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18648 rtx low[2], high[2];
18651 if (CONST_INT_P (operands[2]))
18653 split_double_mode (mode, operands, 2, low, high);
18654 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18656 if (count == GET_MODE_BITSIZE (mode) - 1)
18658 emit_move_insn (high[0], high[1]);
18659 emit_insn (gen_ashr3 (high[0], high[0],
18660 GEN_INT (half_width - 1)));
18661 emit_move_insn (low[0], high[0]);
18664 else if (count >= half_width)
18666 emit_move_insn (low[0], high[1]);
18667 emit_move_insn (high[0], low[0]);
18668 emit_insn (gen_ashr3 (high[0], high[0],
18669 GEN_INT (half_width - 1)));
18671 if (count > half_width)
18672 emit_insn (gen_ashr3 (low[0], low[0],
18673 GEN_INT (count - half_width)));
18677 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18679 if (!rtx_equal_p (operands[0], operands[1]))
18680 emit_move_insn (operands[0], operands[1]);
18682 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18683 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
18688 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18690 if (!rtx_equal_p (operands[0], operands[1]))
18691 emit_move_insn (operands[0], operands[1]);
18693 split_double_mode (mode, operands, 1, low, high);
18695 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18696 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
18698 if (TARGET_CMOVE && scratch)
18700 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18701 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18703 emit_move_insn (scratch, high[0]);
18704 emit_insn (gen_ashr3 (scratch, scratch,
18705 GEN_INT (half_width - 1)));
18706 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18711 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
18712 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
18714 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
18720 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18722 rtx (*gen_lshr3)(rtx, rtx, rtx)
18723 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
18724 rtx (*gen_shrd)(rtx, rtx, rtx);
18725 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18727 rtx low[2], high[2];
18730 if (CONST_INT_P (operands[2]))
18732 split_double_mode (mode, operands, 2, low, high);
18733 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18735 if (count >= half_width)
18737 emit_move_insn (low[0], high[1]);
18738 ix86_expand_clear (high[0]);
18740 if (count > half_width)
18741 emit_insn (gen_lshr3 (low[0], low[0],
18742 GEN_INT (count - half_width)));
18746 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18748 if (!rtx_equal_p (operands[0], operands[1]))
18749 emit_move_insn (operands[0], operands[1]);
18751 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18752 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
18757 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18759 if (!rtx_equal_p (operands[0], operands[1]))
18760 emit_move_insn (operands[0], operands[1]);
18762 split_double_mode (mode, operands, 1, low, high);
18764 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18765 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
18767 if (TARGET_CMOVE && scratch)
18769 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18770 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18772 ix86_expand_clear (scratch);
18773 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18778 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18779 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18781 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
18786 /* Predict just emitted jump instruction to be taken with probability PROB. */
18788 predict_jump (int prob)
18790 rtx insn = get_last_insn ();
18791 gcc_assert (JUMP_P (insn));
18792 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18795 /* Helper function for the string operations below. Dest VARIABLE whether
18796 it is aligned to VALUE bytes. If true, jump to the label. */
18798 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18800 rtx label = gen_label_rtx ();
18801 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18802 if (GET_MODE (variable) == DImode)
18803 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18805 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18806 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18809 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18811 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18815 /* Adjust COUNTER by the VALUE. */
18817 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18819 rtx (*gen_add)(rtx, rtx, rtx)
18820 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
18822 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
18825 /* Zero extend possibly SImode EXP to Pmode register. */
18827 ix86_zero_extend_to_Pmode (rtx exp)
18830 if (GET_MODE (exp) == VOIDmode)
18831 return force_reg (Pmode, exp);
18832 if (GET_MODE (exp) == Pmode)
18833 return copy_to_mode_reg (Pmode, exp);
18834 r = gen_reg_rtx (Pmode);
18835 emit_insn (gen_zero_extendsidi2 (r, exp));
18839 /* Divide COUNTREG by SCALE. */
18841 scale_counter (rtx countreg, int scale)
18847 if (CONST_INT_P (countreg))
18848 return GEN_INT (INTVAL (countreg) / scale);
18849 gcc_assert (REG_P (countreg));
18851 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18852 GEN_INT (exact_log2 (scale)),
18853 NULL, 1, OPTAB_DIRECT);
18857 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18858 DImode for constant loop counts. */
18860 static enum machine_mode
18861 counter_mode (rtx count_exp)
18863 if (GET_MODE (count_exp) != VOIDmode)
18864 return GET_MODE (count_exp);
18865 if (!CONST_INT_P (count_exp))
18867 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18872 /* When SRCPTR is non-NULL, output simple loop to move memory
18873 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18874 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18875 equivalent loop to set memory by VALUE (supposed to be in MODE).
18877 The size is rounded down to whole number of chunk size moved at once.
18878 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18882 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18883 rtx destptr, rtx srcptr, rtx value,
18884 rtx count, enum machine_mode mode, int unroll,
18887 rtx out_label, top_label, iter, tmp;
18888 enum machine_mode iter_mode = counter_mode (count);
18889 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18890 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18896 top_label = gen_label_rtx ();
18897 out_label = gen_label_rtx ();
18898 iter = gen_reg_rtx (iter_mode);
18900 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18901 NULL, 1, OPTAB_DIRECT);
18902 /* Those two should combine. */
18903 if (piece_size == const1_rtx)
18905 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18907 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18909 emit_move_insn (iter, const0_rtx);
18911 emit_label (top_label);
18913 tmp = convert_modes (Pmode, iter_mode, iter, true);
18914 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18915 destmem = change_address (destmem, mode, x_addr);
18919 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18920 srcmem = change_address (srcmem, mode, y_addr);
18922 /* When unrolling for chips that reorder memory reads and writes,
18923 we can save registers by using single temporary.
18924 Also using 4 temporaries is overkill in 32bit mode. */
18925 if (!TARGET_64BIT && 0)
18927 for (i = 0; i < unroll; i++)
18932 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18934 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18936 emit_move_insn (destmem, srcmem);
18942 gcc_assert (unroll <= 4);
18943 for (i = 0; i < unroll; i++)
18945 tmpreg[i] = gen_reg_rtx (mode);
18949 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18951 emit_move_insn (tmpreg[i], srcmem);
18953 for (i = 0; i < unroll; i++)
18958 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18960 emit_move_insn (destmem, tmpreg[i]);
18965 for (i = 0; i < unroll; i++)
18969 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18970 emit_move_insn (destmem, value);
18973 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18974 true, OPTAB_LIB_WIDEN);
18976 emit_move_insn (iter, tmp);
18978 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18980 if (expected_size != -1)
18982 expected_size /= GET_MODE_SIZE (mode) * unroll;
18983 if (expected_size == 0)
18985 else if (expected_size > REG_BR_PROB_BASE)
18986 predict_jump (REG_BR_PROB_BASE - 1);
18988 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18991 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18992 iter = ix86_zero_extend_to_Pmode (iter);
18993 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18994 true, OPTAB_LIB_WIDEN);
18995 if (tmp != destptr)
18996 emit_move_insn (destptr, tmp);
18999 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19000 true, OPTAB_LIB_WIDEN);
19002 emit_move_insn (srcptr, tmp);
19004 emit_label (out_label);
19007 /* Output "rep; mov" instruction.
19008 Arguments have same meaning as for previous function */
19010 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19011 rtx destptr, rtx srcptr,
19013 enum machine_mode mode)
19019 /* If the size is known, it is shorter to use rep movs. */
19020 if (mode == QImode && CONST_INT_P (count)
19021 && !(INTVAL (count) & 3))
19024 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19025 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19026 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19027 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19028 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19029 if (mode != QImode)
19031 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19032 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19033 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19034 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19035 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19036 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19040 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19041 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19043 if (CONST_INT_P (count))
19045 count = GEN_INT (INTVAL (count)
19046 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19047 destmem = shallow_copy_rtx (destmem);
19048 srcmem = shallow_copy_rtx (srcmem);
19049 set_mem_size (destmem, count);
19050 set_mem_size (srcmem, count);
19054 if (MEM_SIZE (destmem))
19055 set_mem_size (destmem, NULL_RTX);
19056 if (MEM_SIZE (srcmem))
19057 set_mem_size (srcmem, NULL_RTX);
19059 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19063 /* Output "rep; stos" instruction.
19064 Arguments have same meaning as for previous function */
19066 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19067 rtx count, enum machine_mode mode,
19073 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19074 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19075 value = force_reg (mode, gen_lowpart (mode, value));
19076 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19077 if (mode != QImode)
19079 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19080 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19081 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19084 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19085 if (orig_value == const0_rtx && CONST_INT_P (count))
19087 count = GEN_INT (INTVAL (count)
19088 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19089 destmem = shallow_copy_rtx (destmem);
19090 set_mem_size (destmem, count);
19092 else if (MEM_SIZE (destmem))
19093 set_mem_size (destmem, NULL_RTX);
19094 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19098 emit_strmov (rtx destmem, rtx srcmem,
19099 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19101 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19102 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19103 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19106 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19108 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19109 rtx destptr, rtx srcptr, rtx count, int max_size)
19112 if (CONST_INT_P (count))
19114 HOST_WIDE_INT countval = INTVAL (count);
19117 if ((countval & 0x10) && max_size > 16)
19121 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19122 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19125 gcc_unreachable ();
19128 if ((countval & 0x08) && max_size > 8)
19131 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19134 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19135 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19139 if ((countval & 0x04) && max_size > 4)
19141 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19144 if ((countval & 0x02) && max_size > 2)
19146 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19149 if ((countval & 0x01) && max_size > 1)
19151 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19158 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19159 count, 1, OPTAB_DIRECT);
19160 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19161 count, QImode, 1, 4);
19165 /* When there are stringops, we can cheaply increase dest and src pointers.
19166 Otherwise we save code size by maintaining offset (zero is readily
19167 available from preceding rep operation) and using x86 addressing modes.
19169 if (TARGET_SINGLE_STRINGOP)
19173 rtx label = ix86_expand_aligntest (count, 4, true);
19174 src = change_address (srcmem, SImode, srcptr);
19175 dest = change_address (destmem, SImode, destptr);
19176 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19177 emit_label (label);
19178 LABEL_NUSES (label) = 1;
19182 rtx label = ix86_expand_aligntest (count, 2, true);
19183 src = change_address (srcmem, HImode, srcptr);
19184 dest = change_address (destmem, HImode, destptr);
19185 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19186 emit_label (label);
19187 LABEL_NUSES (label) = 1;
19191 rtx label = ix86_expand_aligntest (count, 1, true);
19192 src = change_address (srcmem, QImode, srcptr);
19193 dest = change_address (destmem, QImode, destptr);
19194 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19195 emit_label (label);
19196 LABEL_NUSES (label) = 1;
19201 rtx offset = force_reg (Pmode, const0_rtx);
19206 rtx label = ix86_expand_aligntest (count, 4, true);
19207 src = change_address (srcmem, SImode, srcptr);
19208 dest = change_address (destmem, SImode, destptr);
19209 emit_move_insn (dest, src);
19210 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19211 true, OPTAB_LIB_WIDEN);
19213 emit_move_insn (offset, tmp);
19214 emit_label (label);
19215 LABEL_NUSES (label) = 1;
19219 rtx label = ix86_expand_aligntest (count, 2, true);
19220 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19221 src = change_address (srcmem, HImode, tmp);
19222 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19223 dest = change_address (destmem, HImode, tmp);
19224 emit_move_insn (dest, src);
19225 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19226 true, OPTAB_LIB_WIDEN);
19228 emit_move_insn (offset, tmp);
19229 emit_label (label);
19230 LABEL_NUSES (label) = 1;
19234 rtx label = ix86_expand_aligntest (count, 1, true);
19235 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19236 src = change_address (srcmem, QImode, tmp);
19237 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19238 dest = change_address (destmem, QImode, tmp);
19239 emit_move_insn (dest, src);
19240 emit_label (label);
19241 LABEL_NUSES (label) = 1;
19246 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19248 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19249 rtx count, int max_size)
19252 expand_simple_binop (counter_mode (count), AND, count,
19253 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19254 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19255 gen_lowpart (QImode, value), count, QImode,
19259 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19261 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19265 if (CONST_INT_P (count))
19267 HOST_WIDE_INT countval = INTVAL (count);
19270 if ((countval & 0x10) && max_size > 16)
19274 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19275 emit_insn (gen_strset (destptr, dest, value));
19276 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19277 emit_insn (gen_strset (destptr, dest, value));
19280 gcc_unreachable ();
19283 if ((countval & 0x08) && max_size > 8)
19287 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19288 emit_insn (gen_strset (destptr, dest, value));
19292 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19293 emit_insn (gen_strset (destptr, dest, value));
19294 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19295 emit_insn (gen_strset (destptr, dest, value));
19299 if ((countval & 0x04) && max_size > 4)
19301 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19302 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19305 if ((countval & 0x02) && max_size > 2)
19307 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19308 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19311 if ((countval & 0x01) && max_size > 1)
19313 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19314 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19321 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19326 rtx label = ix86_expand_aligntest (count, 16, true);
19329 dest = change_address (destmem, DImode, destptr);
19330 emit_insn (gen_strset (destptr, dest, value));
19331 emit_insn (gen_strset (destptr, dest, value));
19335 dest = change_address (destmem, SImode, destptr);
19336 emit_insn (gen_strset (destptr, dest, value));
19337 emit_insn (gen_strset (destptr, dest, value));
19338 emit_insn (gen_strset (destptr, dest, value));
19339 emit_insn (gen_strset (destptr, dest, value));
19341 emit_label (label);
19342 LABEL_NUSES (label) = 1;
19346 rtx label = ix86_expand_aligntest (count, 8, true);
19349 dest = change_address (destmem, DImode, destptr);
19350 emit_insn (gen_strset (destptr, dest, value));
19354 dest = change_address (destmem, SImode, destptr);
19355 emit_insn (gen_strset (destptr, dest, value));
19356 emit_insn (gen_strset (destptr, dest, value));
19358 emit_label (label);
19359 LABEL_NUSES (label) = 1;
19363 rtx label = ix86_expand_aligntest (count, 4, true);
19364 dest = change_address (destmem, SImode, destptr);
19365 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19366 emit_label (label);
19367 LABEL_NUSES (label) = 1;
19371 rtx label = ix86_expand_aligntest (count, 2, true);
19372 dest = change_address (destmem, HImode, destptr);
19373 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19374 emit_label (label);
19375 LABEL_NUSES (label) = 1;
19379 rtx label = ix86_expand_aligntest (count, 1, true);
19380 dest = change_address (destmem, QImode, destptr);
19381 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19382 emit_label (label);
19383 LABEL_NUSES (label) = 1;
19387 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19388 DESIRED_ALIGNMENT. */
19390 expand_movmem_prologue (rtx destmem, rtx srcmem,
19391 rtx destptr, rtx srcptr, rtx count,
19392 int align, int desired_alignment)
19394 if (align <= 1 && desired_alignment > 1)
19396 rtx label = ix86_expand_aligntest (destptr, 1, false);
19397 srcmem = change_address (srcmem, QImode, srcptr);
19398 destmem = change_address (destmem, QImode, destptr);
19399 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19400 ix86_adjust_counter (count, 1);
19401 emit_label (label);
19402 LABEL_NUSES (label) = 1;
19404 if (align <= 2 && desired_alignment > 2)
19406 rtx label = ix86_expand_aligntest (destptr, 2, false);
19407 srcmem = change_address (srcmem, HImode, srcptr);
19408 destmem = change_address (destmem, HImode, destptr);
19409 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19410 ix86_adjust_counter (count, 2);
19411 emit_label (label);
19412 LABEL_NUSES (label) = 1;
19414 if (align <= 4 && desired_alignment > 4)
19416 rtx label = ix86_expand_aligntest (destptr, 4, false);
19417 srcmem = change_address (srcmem, SImode, srcptr);
19418 destmem = change_address (destmem, SImode, destptr);
19419 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19420 ix86_adjust_counter (count, 4);
19421 emit_label (label);
19422 LABEL_NUSES (label) = 1;
19424 gcc_assert (desired_alignment <= 8);
19427 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19428 ALIGN_BYTES is how many bytes need to be copied. */
19430 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19431 int desired_align, int align_bytes)
19434 rtx src_size, dst_size;
19436 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19437 if (src_align_bytes >= 0)
19438 src_align_bytes = desired_align - src_align_bytes;
19439 src_size = MEM_SIZE (src);
19440 dst_size = MEM_SIZE (dst);
19441 if (align_bytes & 1)
19443 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19444 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19446 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19448 if (align_bytes & 2)
19450 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19451 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19452 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19453 set_mem_align (dst, 2 * BITS_PER_UNIT);
19454 if (src_align_bytes >= 0
19455 && (src_align_bytes & 1) == (align_bytes & 1)
19456 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19457 set_mem_align (src, 2 * BITS_PER_UNIT);
19459 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19461 if (align_bytes & 4)
19463 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19464 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19465 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19466 set_mem_align (dst, 4 * BITS_PER_UNIT);
19467 if (src_align_bytes >= 0)
19469 unsigned int src_align = 0;
19470 if ((src_align_bytes & 3) == (align_bytes & 3))
19472 else if ((src_align_bytes & 1) == (align_bytes & 1))
19474 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19475 set_mem_align (src, src_align * BITS_PER_UNIT);
19478 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19480 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19481 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19482 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19483 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19484 if (src_align_bytes >= 0)
19486 unsigned int src_align = 0;
19487 if ((src_align_bytes & 7) == (align_bytes & 7))
19489 else if ((src_align_bytes & 3) == (align_bytes & 3))
19491 else if ((src_align_bytes & 1) == (align_bytes & 1))
19493 if (src_align > (unsigned int) desired_align)
19494 src_align = desired_align;
19495 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19496 set_mem_align (src, src_align * BITS_PER_UNIT);
19499 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19501 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19506 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19507 DESIRED_ALIGNMENT. */
19509 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19510 int align, int desired_alignment)
19512 if (align <= 1 && desired_alignment > 1)
19514 rtx label = ix86_expand_aligntest (destptr, 1, false);
19515 destmem = change_address (destmem, QImode, destptr);
19516 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19517 ix86_adjust_counter (count, 1);
19518 emit_label (label);
19519 LABEL_NUSES (label) = 1;
19521 if (align <= 2 && desired_alignment > 2)
19523 rtx label = ix86_expand_aligntest (destptr, 2, false);
19524 destmem = change_address (destmem, HImode, destptr);
19525 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19526 ix86_adjust_counter (count, 2);
19527 emit_label (label);
19528 LABEL_NUSES (label) = 1;
19530 if (align <= 4 && desired_alignment > 4)
19532 rtx label = ix86_expand_aligntest (destptr, 4, false);
19533 destmem = change_address (destmem, SImode, destptr);
19534 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19535 ix86_adjust_counter (count, 4);
19536 emit_label (label);
19537 LABEL_NUSES (label) = 1;
19539 gcc_assert (desired_alignment <= 8);
19542 /* Set enough from DST to align DST known to by aligned by ALIGN to
19543 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19545 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19546 int desired_align, int align_bytes)
19549 rtx dst_size = MEM_SIZE (dst);
19550 if (align_bytes & 1)
19552 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19554 emit_insn (gen_strset (destreg, dst,
19555 gen_lowpart (QImode, value)));
19557 if (align_bytes & 2)
19559 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19560 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19561 set_mem_align (dst, 2 * BITS_PER_UNIT);
19563 emit_insn (gen_strset (destreg, dst,
19564 gen_lowpart (HImode, value)));
19566 if (align_bytes & 4)
19568 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19569 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19570 set_mem_align (dst, 4 * BITS_PER_UNIT);
19572 emit_insn (gen_strset (destreg, dst,
19573 gen_lowpart (SImode, value)));
19575 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19576 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19577 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19579 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19583 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19584 static enum stringop_alg
19585 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19586 int *dynamic_check)
19588 const struct stringop_algs * algs;
19589 bool optimize_for_speed;
19590 /* Algorithms using the rep prefix want at least edi and ecx;
19591 additionally, memset wants eax and memcpy wants esi. Don't
19592 consider such algorithms if the user has appropriated those
19593 registers for their own purposes. */
19594 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19596 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19598 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19599 || (alg != rep_prefix_1_byte \
19600 && alg != rep_prefix_4_byte \
19601 && alg != rep_prefix_8_byte))
19602 const struct processor_costs *cost;
19604 /* Even if the string operation call is cold, we still might spend a lot
19605 of time processing large blocks. */
19606 if (optimize_function_for_size_p (cfun)
19607 || (optimize_insn_for_size_p ()
19608 && expected_size != -1 && expected_size < 256))
19609 optimize_for_speed = false;
19611 optimize_for_speed = true;
19613 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19615 *dynamic_check = -1;
19617 algs = &cost->memset[TARGET_64BIT != 0];
19619 algs = &cost->memcpy[TARGET_64BIT != 0];
19620 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19621 return stringop_alg;
19622 /* rep; movq or rep; movl is the smallest variant. */
19623 else if (!optimize_for_speed)
19625 if (!count || (count & 3))
19626 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19628 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19630 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19632 else if (expected_size != -1 && expected_size < 4)
19633 return loop_1_byte;
19634 else if (expected_size != -1)
19637 enum stringop_alg alg = libcall;
19638 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19640 /* We get here if the algorithms that were not libcall-based
19641 were rep-prefix based and we are unable to use rep prefixes
19642 based on global register usage. Break out of the loop and
19643 use the heuristic below. */
19644 if (algs->size[i].max == 0)
19646 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19648 enum stringop_alg candidate = algs->size[i].alg;
19650 if (candidate != libcall && ALG_USABLE_P (candidate))
19652 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19653 last non-libcall inline algorithm. */
19654 if (TARGET_INLINE_ALL_STRINGOPS)
19656 /* When the current size is best to be copied by a libcall,
19657 but we are still forced to inline, run the heuristic below
19658 that will pick code for medium sized blocks. */
19659 if (alg != libcall)
19663 else if (ALG_USABLE_P (candidate))
19667 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19669 /* When asked to inline the call anyway, try to pick meaningful choice.
19670 We look for maximal size of block that is faster to copy by hand and
19671 take blocks of at most of that size guessing that average size will
19672 be roughly half of the block.
19674 If this turns out to be bad, we might simply specify the preferred
19675 choice in ix86_costs. */
19676 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19677 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19680 enum stringop_alg alg;
19682 bool any_alg_usable_p = true;
19684 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19686 enum stringop_alg candidate = algs->size[i].alg;
19687 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19689 if (candidate != libcall && candidate
19690 && ALG_USABLE_P (candidate))
19691 max = algs->size[i].max;
19693 /* If there aren't any usable algorithms, then recursing on
19694 smaller sizes isn't going to find anything. Just return the
19695 simple byte-at-a-time copy loop. */
19696 if (!any_alg_usable_p)
19698 /* Pick something reasonable. */
19699 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19700 *dynamic_check = 128;
19701 return loop_1_byte;
19705 alg = decide_alg (count, max / 2, memset, dynamic_check);
19706 gcc_assert (*dynamic_check == -1);
19707 gcc_assert (alg != libcall);
19708 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19709 *dynamic_check = max;
19712 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19713 #undef ALG_USABLE_P
19716 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19717 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19719 decide_alignment (int align,
19720 enum stringop_alg alg,
19723 int desired_align = 0;
19727 gcc_unreachable ();
19729 case unrolled_loop:
19730 desired_align = GET_MODE_SIZE (Pmode);
19732 case rep_prefix_8_byte:
19735 case rep_prefix_4_byte:
19736 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19737 copying whole cacheline at once. */
19738 if (TARGET_PENTIUMPRO)
19743 case rep_prefix_1_byte:
19744 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19745 copying whole cacheline at once. */
19746 if (TARGET_PENTIUMPRO)
19760 if (desired_align < align)
19761 desired_align = align;
19762 if (expected_size != -1 && expected_size < 4)
19763 desired_align = align;
19764 return desired_align;
19767 /* Return the smallest power of 2 greater than VAL. */
19769 smallest_pow2_greater_than (int val)
19777 /* Expand string move (memcpy) operation. Use i386 string operations when
19778 profitable. expand_setmem contains similar code. The code depends upon
19779 architecture, block size and alignment, but always has the same
19782 1) Prologue guard: Conditional that jumps up to epilogues for small
19783 blocks that can be handled by epilogue alone. This is faster but
19784 also needed for correctness, since prologue assume the block is larger
19785 than the desired alignment.
19787 Optional dynamic check for size and libcall for large
19788 blocks is emitted here too, with -minline-stringops-dynamically.
19790 2) Prologue: copy first few bytes in order to get destination aligned
19791 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19792 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19793 We emit either a jump tree on power of two sized blocks, or a byte loop.
19795 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19796 with specified algorithm.
19798 4) Epilogue: code copying tail of the block that is too small to be
19799 handled by main body (or up to size guarded by prologue guard). */
19802 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19803 rtx expected_align_exp, rtx expected_size_exp)
19809 rtx jump_around_label = NULL;
19810 HOST_WIDE_INT align = 1;
19811 unsigned HOST_WIDE_INT count = 0;
19812 HOST_WIDE_INT expected_size = -1;
19813 int size_needed = 0, epilogue_size_needed;
19814 int desired_align = 0, align_bytes = 0;
19815 enum stringop_alg alg;
19817 bool need_zero_guard = false;
19819 if (CONST_INT_P (align_exp))
19820 align = INTVAL (align_exp);
19821 /* i386 can do misaligned access on reasonably increased cost. */
19822 if (CONST_INT_P (expected_align_exp)
19823 && INTVAL (expected_align_exp) > align)
19824 align = INTVAL (expected_align_exp);
19825 /* ALIGN is the minimum of destination and source alignment, but we care here
19826 just about destination alignment. */
19827 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19828 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19830 if (CONST_INT_P (count_exp))
19831 count = expected_size = INTVAL (count_exp);
19832 if (CONST_INT_P (expected_size_exp) && count == 0)
19833 expected_size = INTVAL (expected_size_exp);
19835 /* Make sure we don't need to care about overflow later on. */
19836 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19839 /* Step 0: Decide on preferred algorithm, desired alignment and
19840 size of chunks to be copied by main loop. */
19842 alg = decide_alg (count, expected_size, false, &dynamic_check);
19843 desired_align = decide_alignment (align, alg, expected_size);
19845 if (!TARGET_ALIGN_STRINGOPS)
19846 align = desired_align;
19848 if (alg == libcall)
19850 gcc_assert (alg != no_stringop);
19852 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19853 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19854 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19859 gcc_unreachable ();
19861 need_zero_guard = true;
19862 size_needed = GET_MODE_SIZE (Pmode);
19864 case unrolled_loop:
19865 need_zero_guard = true;
19866 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19868 case rep_prefix_8_byte:
19871 case rep_prefix_4_byte:
19874 case rep_prefix_1_byte:
19878 need_zero_guard = true;
19883 epilogue_size_needed = size_needed;
19885 /* Step 1: Prologue guard. */
19887 /* Alignment code needs count to be in register. */
19888 if (CONST_INT_P (count_exp) && desired_align > align)
19890 if (INTVAL (count_exp) > desired_align
19891 && INTVAL (count_exp) > size_needed)
19894 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19895 if (align_bytes <= 0)
19898 align_bytes = desired_align - align_bytes;
19900 if (align_bytes == 0)
19901 count_exp = force_reg (counter_mode (count_exp), count_exp);
19903 gcc_assert (desired_align >= 1 && align >= 1);
19905 /* Ensure that alignment prologue won't copy past end of block. */
19906 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19908 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19909 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19910 Make sure it is power of 2. */
19911 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19915 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19917 /* If main algorithm works on QImode, no epilogue is needed.
19918 For small sizes just don't align anything. */
19919 if (size_needed == 1)
19920 desired_align = align;
19927 label = gen_label_rtx ();
19928 emit_cmp_and_jump_insns (count_exp,
19929 GEN_INT (epilogue_size_needed),
19930 LTU, 0, counter_mode (count_exp), 1, label);
19931 if (expected_size == -1 || expected_size < epilogue_size_needed)
19932 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19934 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19938 /* Emit code to decide on runtime whether library call or inline should be
19940 if (dynamic_check != -1)
19942 if (CONST_INT_P (count_exp))
19944 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19946 emit_block_move_via_libcall (dst, src, count_exp, false);
19947 count_exp = const0_rtx;
19953 rtx hot_label = gen_label_rtx ();
19954 jump_around_label = gen_label_rtx ();
19955 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19956 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19957 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19958 emit_block_move_via_libcall (dst, src, count_exp, false);
19959 emit_jump (jump_around_label);
19960 emit_label (hot_label);
19964 /* Step 2: Alignment prologue. */
19966 if (desired_align > align)
19968 if (align_bytes == 0)
19970 /* Except for the first move in epilogue, we no longer know
19971 constant offset in aliasing info. It don't seems to worth
19972 the pain to maintain it for the first move, so throw away
19974 src = change_address (src, BLKmode, srcreg);
19975 dst = change_address (dst, BLKmode, destreg);
19976 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19981 /* If we know how many bytes need to be stored before dst is
19982 sufficiently aligned, maintain aliasing info accurately. */
19983 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19984 desired_align, align_bytes);
19985 count_exp = plus_constant (count_exp, -align_bytes);
19986 count -= align_bytes;
19988 if (need_zero_guard
19989 && (count < (unsigned HOST_WIDE_INT) size_needed
19990 || (align_bytes == 0
19991 && count < ((unsigned HOST_WIDE_INT) size_needed
19992 + desired_align - align))))
19994 /* It is possible that we copied enough so the main loop will not
19996 gcc_assert (size_needed > 1);
19997 if (label == NULL_RTX)
19998 label = gen_label_rtx ();
19999 emit_cmp_and_jump_insns (count_exp,
20000 GEN_INT (size_needed),
20001 LTU, 0, counter_mode (count_exp), 1, label);
20002 if (expected_size == -1
20003 || expected_size < (desired_align - align) / 2 + size_needed)
20004 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20006 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20009 if (label && size_needed == 1)
20011 emit_label (label);
20012 LABEL_NUSES (label) = 1;
20014 epilogue_size_needed = 1;
20016 else if (label == NULL_RTX)
20017 epilogue_size_needed = size_needed;
20019 /* Step 3: Main loop. */
20025 gcc_unreachable ();
20027 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20028 count_exp, QImode, 1, expected_size);
20031 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20032 count_exp, Pmode, 1, expected_size);
20034 case unrolled_loop:
20035 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20036 registers for 4 temporaries anyway. */
20037 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20038 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20041 case rep_prefix_8_byte:
20042 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20045 case rep_prefix_4_byte:
20046 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20049 case rep_prefix_1_byte:
20050 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20054 /* Adjust properly the offset of src and dest memory for aliasing. */
20055 if (CONST_INT_P (count_exp))
20057 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20058 (count / size_needed) * size_needed);
20059 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20060 (count / size_needed) * size_needed);
20064 src = change_address (src, BLKmode, srcreg);
20065 dst = change_address (dst, BLKmode, destreg);
20068 /* Step 4: Epilogue to copy the remaining bytes. */
20072 /* When the main loop is done, COUNT_EXP might hold original count,
20073 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20074 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20075 bytes. Compensate if needed. */
20077 if (size_needed < epilogue_size_needed)
20080 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20081 GEN_INT (size_needed - 1), count_exp, 1,
20083 if (tmp != count_exp)
20084 emit_move_insn (count_exp, tmp);
20086 emit_label (label);
20087 LABEL_NUSES (label) = 1;
20090 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20091 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20092 epilogue_size_needed);
20093 if (jump_around_label)
20094 emit_label (jump_around_label);
20098 /* Helper function for memcpy. For QImode value 0xXY produce
20099 0xXYXYXYXY of wide specified by MODE. This is essentially
20100 a * 0x10101010, but we can do slightly better than
20101 synth_mult by unwinding the sequence by hand on CPUs with
20104 promote_duplicated_reg (enum machine_mode mode, rtx val)
20106 enum machine_mode valmode = GET_MODE (val);
20108 int nops = mode == DImode ? 3 : 2;
20110 gcc_assert (mode == SImode || mode == DImode);
20111 if (val == const0_rtx)
20112 return copy_to_mode_reg (mode, const0_rtx);
20113 if (CONST_INT_P (val))
20115 HOST_WIDE_INT v = INTVAL (val) & 255;
20119 if (mode == DImode)
20120 v |= (v << 16) << 16;
20121 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20124 if (valmode == VOIDmode)
20126 if (valmode != QImode)
20127 val = gen_lowpart (QImode, val);
20128 if (mode == QImode)
20130 if (!TARGET_PARTIAL_REG_STALL)
20132 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20133 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20134 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20135 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20137 rtx reg = convert_modes (mode, QImode, val, true);
20138 tmp = promote_duplicated_reg (mode, const1_rtx);
20139 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20144 rtx reg = convert_modes (mode, QImode, val, true);
20146 if (!TARGET_PARTIAL_REG_STALL)
20147 if (mode == SImode)
20148 emit_insn (gen_movsi_insv_1 (reg, reg));
20150 emit_insn (gen_movdi_insv_1 (reg, reg));
20153 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20154 NULL, 1, OPTAB_DIRECT);
20156 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20158 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20159 NULL, 1, OPTAB_DIRECT);
20160 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20161 if (mode == SImode)
20163 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20164 NULL, 1, OPTAB_DIRECT);
20165 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20170 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20171 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20172 alignment from ALIGN to DESIRED_ALIGN. */
20174 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20179 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20180 promoted_val = promote_duplicated_reg (DImode, val);
20181 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20182 promoted_val = promote_duplicated_reg (SImode, val);
20183 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20184 promoted_val = promote_duplicated_reg (HImode, val);
20186 promoted_val = val;
20188 return promoted_val;
20191 /* Expand string clear operation (bzero). Use i386 string operations when
20192 profitable. See expand_movmem comment for explanation of individual
20193 steps performed. */
20195 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20196 rtx expected_align_exp, rtx expected_size_exp)
20201 rtx jump_around_label = NULL;
20202 HOST_WIDE_INT align = 1;
20203 unsigned HOST_WIDE_INT count = 0;
20204 HOST_WIDE_INT expected_size = -1;
20205 int size_needed = 0, epilogue_size_needed;
20206 int desired_align = 0, align_bytes = 0;
20207 enum stringop_alg alg;
20208 rtx promoted_val = NULL;
20209 bool force_loopy_epilogue = false;
20211 bool need_zero_guard = false;
20213 if (CONST_INT_P (align_exp))
20214 align = INTVAL (align_exp);
20215 /* i386 can do misaligned access on reasonably increased cost. */
20216 if (CONST_INT_P (expected_align_exp)
20217 && INTVAL (expected_align_exp) > align)
20218 align = INTVAL (expected_align_exp);
20219 if (CONST_INT_P (count_exp))
20220 count = expected_size = INTVAL (count_exp);
20221 if (CONST_INT_P (expected_size_exp) && count == 0)
20222 expected_size = INTVAL (expected_size_exp);
20224 /* Make sure we don't need to care about overflow later on. */
20225 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20228 /* Step 0: Decide on preferred algorithm, desired alignment and
20229 size of chunks to be copied by main loop. */
20231 alg = decide_alg (count, expected_size, true, &dynamic_check);
20232 desired_align = decide_alignment (align, alg, expected_size);
20234 if (!TARGET_ALIGN_STRINGOPS)
20235 align = desired_align;
20237 if (alg == libcall)
20239 gcc_assert (alg != no_stringop);
20241 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20242 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20247 gcc_unreachable ();
20249 need_zero_guard = true;
20250 size_needed = GET_MODE_SIZE (Pmode);
20252 case unrolled_loop:
20253 need_zero_guard = true;
20254 size_needed = GET_MODE_SIZE (Pmode) * 4;
20256 case rep_prefix_8_byte:
20259 case rep_prefix_4_byte:
20262 case rep_prefix_1_byte:
20266 need_zero_guard = true;
20270 epilogue_size_needed = size_needed;
20272 /* Step 1: Prologue guard. */
20274 /* Alignment code needs count to be in register. */
20275 if (CONST_INT_P (count_exp) && desired_align > align)
20277 if (INTVAL (count_exp) > desired_align
20278 && INTVAL (count_exp) > size_needed)
20281 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20282 if (align_bytes <= 0)
20285 align_bytes = desired_align - align_bytes;
20287 if (align_bytes == 0)
20289 enum machine_mode mode = SImode;
20290 if (TARGET_64BIT && (count & ~0xffffffff))
20292 count_exp = force_reg (mode, count_exp);
20295 /* Do the cheap promotion to allow better CSE across the
20296 main loop and epilogue (ie one load of the big constant in the
20297 front of all code. */
20298 if (CONST_INT_P (val_exp))
20299 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20300 desired_align, align);
20301 /* Ensure that alignment prologue won't copy past end of block. */
20302 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20304 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20305 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20306 Make sure it is power of 2. */
20307 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20309 /* To improve performance of small blocks, we jump around the VAL
20310 promoting mode. This mean that if the promoted VAL is not constant,
20311 we might not use it in the epilogue and have to use byte
20313 if (epilogue_size_needed > 2 && !promoted_val)
20314 force_loopy_epilogue = true;
20317 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20319 /* If main algorithm works on QImode, no epilogue is needed.
20320 For small sizes just don't align anything. */
20321 if (size_needed == 1)
20322 desired_align = align;
20329 label = gen_label_rtx ();
20330 emit_cmp_and_jump_insns (count_exp,
20331 GEN_INT (epilogue_size_needed),
20332 LTU, 0, counter_mode (count_exp), 1, label);
20333 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20334 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20336 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20339 if (dynamic_check != -1)
20341 rtx hot_label = gen_label_rtx ();
20342 jump_around_label = gen_label_rtx ();
20343 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20344 LEU, 0, counter_mode (count_exp), 1, hot_label);
20345 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20346 set_storage_via_libcall (dst, count_exp, val_exp, false);
20347 emit_jump (jump_around_label);
20348 emit_label (hot_label);
20351 /* Step 2: Alignment prologue. */
20353 /* Do the expensive promotion once we branched off the small blocks. */
20355 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20356 desired_align, align);
20357 gcc_assert (desired_align >= 1 && align >= 1);
20359 if (desired_align > align)
20361 if (align_bytes == 0)
20363 /* Except for the first move in epilogue, we no longer know
20364 constant offset in aliasing info. It don't seems to worth
20365 the pain to maintain it for the first move, so throw away
20367 dst = change_address (dst, BLKmode, destreg);
20368 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20373 /* If we know how many bytes need to be stored before dst is
20374 sufficiently aligned, maintain aliasing info accurately. */
20375 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20376 desired_align, align_bytes);
20377 count_exp = plus_constant (count_exp, -align_bytes);
20378 count -= align_bytes;
20380 if (need_zero_guard
20381 && (count < (unsigned HOST_WIDE_INT) size_needed
20382 || (align_bytes == 0
20383 && count < ((unsigned HOST_WIDE_INT) size_needed
20384 + desired_align - align))))
20386 /* It is possible that we copied enough so the main loop will not
20388 gcc_assert (size_needed > 1);
20389 if (label == NULL_RTX)
20390 label = gen_label_rtx ();
20391 emit_cmp_and_jump_insns (count_exp,
20392 GEN_INT (size_needed),
20393 LTU, 0, counter_mode (count_exp), 1, label);
20394 if (expected_size == -1
20395 || expected_size < (desired_align - align) / 2 + size_needed)
20396 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20398 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20401 if (label && size_needed == 1)
20403 emit_label (label);
20404 LABEL_NUSES (label) = 1;
20406 promoted_val = val_exp;
20407 epilogue_size_needed = 1;
20409 else if (label == NULL_RTX)
20410 epilogue_size_needed = size_needed;
20412 /* Step 3: Main loop. */
20418 gcc_unreachable ();
20420 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20421 count_exp, QImode, 1, expected_size);
20424 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20425 count_exp, Pmode, 1, expected_size);
20427 case unrolled_loop:
20428 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20429 count_exp, Pmode, 4, expected_size);
20431 case rep_prefix_8_byte:
20432 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20435 case rep_prefix_4_byte:
20436 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20439 case rep_prefix_1_byte:
20440 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20444 /* Adjust properly the offset of src and dest memory for aliasing. */
20445 if (CONST_INT_P (count_exp))
20446 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20447 (count / size_needed) * size_needed);
20449 dst = change_address (dst, BLKmode, destreg);
20451 /* Step 4: Epilogue to copy the remaining bytes. */
20455 /* When the main loop is done, COUNT_EXP might hold original count,
20456 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20457 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20458 bytes. Compensate if needed. */
20460 if (size_needed < epilogue_size_needed)
20463 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20464 GEN_INT (size_needed - 1), count_exp, 1,
20466 if (tmp != count_exp)
20467 emit_move_insn (count_exp, tmp);
20469 emit_label (label);
20470 LABEL_NUSES (label) = 1;
20473 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20475 if (force_loopy_epilogue)
20476 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20477 epilogue_size_needed);
20479 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20480 epilogue_size_needed);
20482 if (jump_around_label)
20483 emit_label (jump_around_label);
20487 /* Expand the appropriate insns for doing strlen if not just doing
20490 out = result, initialized with the start address
20491 align_rtx = alignment of the address.
20492 scratch = scratch register, initialized with the startaddress when
20493 not aligned, otherwise undefined
20495 This is just the body. It needs the initializations mentioned above and
20496 some address computing at the end. These things are done in i386.md. */
20499 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20503 rtx align_2_label = NULL_RTX;
20504 rtx align_3_label = NULL_RTX;
20505 rtx align_4_label = gen_label_rtx ();
20506 rtx end_0_label = gen_label_rtx ();
20508 rtx tmpreg = gen_reg_rtx (SImode);
20509 rtx scratch = gen_reg_rtx (SImode);
20513 if (CONST_INT_P (align_rtx))
20514 align = INTVAL (align_rtx);
20516 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20518 /* Is there a known alignment and is it less than 4? */
20521 rtx scratch1 = gen_reg_rtx (Pmode);
20522 emit_move_insn (scratch1, out);
20523 /* Is there a known alignment and is it not 2? */
20526 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20527 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20529 /* Leave just the 3 lower bits. */
20530 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20531 NULL_RTX, 0, OPTAB_WIDEN);
20533 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20534 Pmode, 1, align_4_label);
20535 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20536 Pmode, 1, align_2_label);
20537 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20538 Pmode, 1, align_3_label);
20542 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20543 check if is aligned to 4 - byte. */
20545 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20546 NULL_RTX, 0, OPTAB_WIDEN);
20548 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20549 Pmode, 1, align_4_label);
20552 mem = change_address (src, QImode, out);
20554 /* Now compare the bytes. */
20556 /* Compare the first n unaligned byte on a byte per byte basis. */
20557 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20558 QImode, 1, end_0_label);
20560 /* Increment the address. */
20561 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20563 /* Not needed with an alignment of 2 */
20566 emit_label (align_2_label);
20568 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20571 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20573 emit_label (align_3_label);
20576 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20579 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20582 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20583 align this loop. It gives only huge programs, but does not help to
20585 emit_label (align_4_label);
20587 mem = change_address (src, SImode, out);
20588 emit_move_insn (scratch, mem);
20589 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20591 /* This formula yields a nonzero result iff one of the bytes is zero.
20592 This saves three branches inside loop and many cycles. */
20594 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20595 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20596 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20597 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20598 gen_int_mode (0x80808080, SImode)));
20599 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20604 rtx reg = gen_reg_rtx (SImode);
20605 rtx reg2 = gen_reg_rtx (Pmode);
20606 emit_move_insn (reg, tmpreg);
20607 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20609 /* If zero is not in the first two bytes, move two bytes forward. */
20610 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20611 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20612 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20613 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20614 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20617 /* Emit lea manually to avoid clobbering of flags. */
20618 emit_insn (gen_rtx_SET (SImode, reg2,
20619 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20621 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20622 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20623 emit_insn (gen_rtx_SET (VOIDmode, out,
20624 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20630 rtx end_2_label = gen_label_rtx ();
20631 /* Is zero in the first two bytes? */
20633 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20634 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20635 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20636 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20637 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20639 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20640 JUMP_LABEL (tmp) = end_2_label;
20642 /* Not in the first two. Move two bytes forward. */
20643 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20644 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20646 emit_label (end_2_label);
20650 /* Avoid branch in fixing the byte. */
20651 tmpreg = gen_lowpart (QImode, tmpreg);
20652 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20653 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20654 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20655 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20657 emit_label (end_0_label);
20660 /* Expand strlen. */
20663 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20665 rtx addr, scratch1, scratch2, scratch3, scratch4;
20667 /* The generic case of strlen expander is long. Avoid it's
20668 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20670 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20671 && !TARGET_INLINE_ALL_STRINGOPS
20672 && !optimize_insn_for_size_p ()
20673 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20676 addr = force_reg (Pmode, XEXP (src, 0));
20677 scratch1 = gen_reg_rtx (Pmode);
20679 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20680 && !optimize_insn_for_size_p ())
20682 /* Well it seems that some optimizer does not combine a call like
20683 foo(strlen(bar), strlen(bar));
20684 when the move and the subtraction is done here. It does calculate
20685 the length just once when these instructions are done inside of
20686 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20687 often used and I use one fewer register for the lifetime of
20688 output_strlen_unroll() this is better. */
20690 emit_move_insn (out, addr);
20692 ix86_expand_strlensi_unroll_1 (out, src, align);
20694 /* strlensi_unroll_1 returns the address of the zero at the end of
20695 the string, like memchr(), so compute the length by subtracting
20696 the start address. */
20697 emit_insn (ix86_gen_sub3 (out, out, addr));
20703 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20704 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20707 scratch2 = gen_reg_rtx (Pmode);
20708 scratch3 = gen_reg_rtx (Pmode);
20709 scratch4 = force_reg (Pmode, constm1_rtx);
20711 emit_move_insn (scratch3, addr);
20712 eoschar = force_reg (QImode, eoschar);
20714 src = replace_equiv_address_nv (src, scratch3);
20716 /* If .md starts supporting :P, this can be done in .md. */
20717 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20718 scratch4), UNSPEC_SCAS);
20719 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20720 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20721 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20726 /* For given symbol (function) construct code to compute address of it's PLT
20727 entry in large x86-64 PIC model. */
20729 construct_plt_address (rtx symbol)
20731 rtx tmp = gen_reg_rtx (Pmode);
20732 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20734 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20735 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20737 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20738 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20743 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20745 rtx pop, int sibcall)
20747 rtx use = NULL, call;
20749 if (pop == const0_rtx)
20751 gcc_assert (!TARGET_64BIT || !pop);
20753 if (TARGET_MACHO && !TARGET_64BIT)
20756 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20757 fnaddr = machopic_indirect_call_target (fnaddr);
20762 /* Static functions and indirect calls don't need the pic register. */
20763 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20764 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20765 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20766 use_reg (&use, pic_offset_table_rtx);
20769 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20771 rtx al = gen_rtx_REG (QImode, AX_REG);
20772 emit_move_insn (al, callarg2);
20773 use_reg (&use, al);
20776 if (ix86_cmodel == CM_LARGE_PIC
20778 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20779 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20780 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20782 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20783 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20785 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20786 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20789 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20791 call = gen_rtx_SET (VOIDmode, retval, call);
20794 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20795 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20796 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20799 && ix86_cfun_abi () == MS_ABI
20800 && (!callarg2 || INTVAL (callarg2) != -2))
20802 /* We need to represent that SI and DI registers are clobbered
20804 static int clobbered_registers[] = {
20805 XMM6_REG, XMM7_REG, XMM8_REG,
20806 XMM9_REG, XMM10_REG, XMM11_REG,
20807 XMM12_REG, XMM13_REG, XMM14_REG,
20808 XMM15_REG, SI_REG, DI_REG
20811 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20812 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20813 UNSPEC_MS_TO_SYSV_CALL);
20817 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20818 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20821 (SSE_REGNO_P (clobbered_registers[i])
20823 clobbered_registers[i]));
20825 call = gen_rtx_PARALLEL (VOIDmode,
20826 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20830 call = emit_call_insn (call);
20832 CALL_INSN_FUNCTION_USAGE (call) = use;
20838 /* Clear stack slot assignments remembered from previous functions.
20839 This is called from INIT_EXPANDERS once before RTL is emitted for each
20842 static struct machine_function *
20843 ix86_init_machine_status (void)
20845 struct machine_function *f;
20847 f = ggc_alloc_cleared_machine_function ();
20848 f->use_fast_prologue_epilogue_nregs = -1;
20849 f->tls_descriptor_call_expanded_p = 0;
20850 f->call_abi = ix86_abi;
20855 /* Return a MEM corresponding to a stack slot with mode MODE.
20856 Allocate a new slot if necessary.
20858 The RTL for a function can have several slots available: N is
20859 which slot to use. */
20862 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20864 struct stack_local_entry *s;
20866 gcc_assert (n < MAX_386_STACK_LOCALS);
20868 /* Virtual slot is valid only before vregs are instantiated. */
20869 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20871 for (s = ix86_stack_locals; s; s = s->next)
20872 if (s->mode == mode && s->n == n)
20873 return copy_rtx (s->rtl);
20875 s = ggc_alloc_stack_local_entry ();
20878 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20880 s->next = ix86_stack_locals;
20881 ix86_stack_locals = s;
20885 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20887 static GTY(()) rtx ix86_tls_symbol;
20889 ix86_tls_get_addr (void)
20892 if (!ix86_tls_symbol)
20894 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20895 (TARGET_ANY_GNU_TLS
20897 ? "___tls_get_addr"
20898 : "__tls_get_addr");
20901 return ix86_tls_symbol;
20904 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20906 static GTY(()) rtx ix86_tls_module_base_symbol;
20908 ix86_tls_module_base (void)
20911 if (!ix86_tls_module_base_symbol)
20913 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20914 "_TLS_MODULE_BASE_");
20915 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20916 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20919 return ix86_tls_module_base_symbol;
20922 /* Calculate the length of the memory address in the instruction
20923 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20926 memory_address_length (rtx addr)
20928 struct ix86_address parts;
20929 rtx base, index, disp;
20933 if (GET_CODE (addr) == PRE_DEC
20934 || GET_CODE (addr) == POST_INC
20935 || GET_CODE (addr) == PRE_MODIFY
20936 || GET_CODE (addr) == POST_MODIFY)
20939 ok = ix86_decompose_address (addr, &parts);
20942 if (parts.base && GET_CODE (parts.base) == SUBREG)
20943 parts.base = SUBREG_REG (parts.base);
20944 if (parts.index && GET_CODE (parts.index) == SUBREG)
20945 parts.index = SUBREG_REG (parts.index);
20948 index = parts.index;
20953 - esp as the base always wants an index,
20954 - ebp as the base always wants a displacement,
20955 - r12 as the base always wants an index,
20956 - r13 as the base always wants a displacement. */
20958 /* Register Indirect. */
20959 if (base && !index && !disp)
20961 /* esp (for its index) and ebp (for its displacement) need
20962 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20965 && (addr == arg_pointer_rtx
20966 || addr == frame_pointer_rtx
20967 || REGNO (addr) == SP_REG
20968 || REGNO (addr) == BP_REG
20969 || REGNO (addr) == R12_REG
20970 || REGNO (addr) == R13_REG))
20974 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20975 is not disp32, but disp32(%rip), so for disp32
20976 SIB byte is needed, unless print_operand_address
20977 optimizes it into disp32(%rip) or (%rip) is implied
20979 else if (disp && !base && !index)
20986 if (GET_CODE (disp) == CONST)
20987 symbol = XEXP (disp, 0);
20988 if (GET_CODE (symbol) == PLUS
20989 && CONST_INT_P (XEXP (symbol, 1)))
20990 symbol = XEXP (symbol, 0);
20992 if (GET_CODE (symbol) != LABEL_REF
20993 && (GET_CODE (symbol) != SYMBOL_REF
20994 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20995 && (GET_CODE (symbol) != UNSPEC
20996 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20997 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21004 /* Find the length of the displacement constant. */
21007 if (base && satisfies_constraint_K (disp))
21012 /* ebp always wants a displacement. Similarly r13. */
21013 else if (base && REG_P (base)
21014 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21017 /* An index requires the two-byte modrm form.... */
21019 /* ...like esp (or r12), which always wants an index. */
21020 || base == arg_pointer_rtx
21021 || base == frame_pointer_rtx
21022 || (base && REG_P (base)
21023 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21040 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21041 is set, expect that insn have 8bit immediate alternative. */
21043 ix86_attr_length_immediate_default (rtx insn, int shortform)
21047 extract_insn_cached (insn);
21048 for (i = recog_data.n_operands - 1; i >= 0; --i)
21049 if (CONSTANT_P (recog_data.operand[i]))
21051 enum attr_mode mode = get_attr_mode (insn);
21054 if (shortform && CONST_INT_P (recog_data.operand[i]))
21056 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21063 ival = trunc_int_for_mode (ival, HImode);
21066 ival = trunc_int_for_mode (ival, SImode);
21071 if (IN_RANGE (ival, -128, 127))
21088 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21093 fatal_insn ("unknown insn mode", insn);
21098 /* Compute default value for "length_address" attribute. */
21100 ix86_attr_length_address_default (rtx insn)
21104 if (get_attr_type (insn) == TYPE_LEA)
21106 rtx set = PATTERN (insn), addr;
21108 if (GET_CODE (set) == PARALLEL)
21109 set = XVECEXP (set, 0, 0);
21111 gcc_assert (GET_CODE (set) == SET);
21113 addr = SET_SRC (set);
21114 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21116 if (GET_CODE (addr) == ZERO_EXTEND)
21117 addr = XEXP (addr, 0);
21118 if (GET_CODE (addr) == SUBREG)
21119 addr = SUBREG_REG (addr);
21122 return memory_address_length (addr);
21125 extract_insn_cached (insn);
21126 for (i = recog_data.n_operands - 1; i >= 0; --i)
21127 if (MEM_P (recog_data.operand[i]))
21129 constrain_operands_cached (reload_completed);
21130 if (which_alternative != -1)
21132 const char *constraints = recog_data.constraints[i];
21133 int alt = which_alternative;
21135 while (*constraints == '=' || *constraints == '+')
21138 while (*constraints++ != ',')
21140 /* Skip ignored operands. */
21141 if (*constraints == 'X')
21144 return memory_address_length (XEXP (recog_data.operand[i], 0));
21149 /* Compute default value for "length_vex" attribute. It includes
21150 2 or 3 byte VEX prefix and 1 opcode byte. */
21153 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21158 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21159 byte VEX prefix. */
21160 if (!has_0f_opcode || has_vex_w)
21163 /* We can always use 2 byte VEX prefix in 32bit. */
21167 extract_insn_cached (insn);
21169 for (i = recog_data.n_operands - 1; i >= 0; --i)
21170 if (REG_P (recog_data.operand[i]))
21172 /* REX.W bit uses 3 byte VEX prefix. */
21173 if (GET_MODE (recog_data.operand[i]) == DImode
21174 && GENERAL_REG_P (recog_data.operand[i]))
21179 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21180 if (MEM_P (recog_data.operand[i])
21181 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21188 /* Return the maximum number of instructions a cpu can issue. */
21191 ix86_issue_rate (void)
21195 case PROCESSOR_PENTIUM:
21196 case PROCESSOR_ATOM:
21200 case PROCESSOR_PENTIUMPRO:
21201 case PROCESSOR_PENTIUM4:
21202 case PROCESSOR_ATHLON:
21204 case PROCESSOR_AMDFAM10:
21205 case PROCESSOR_NOCONA:
21206 case PROCESSOR_GENERIC32:
21207 case PROCESSOR_GENERIC64:
21208 case PROCESSOR_BDVER1:
21211 case PROCESSOR_CORE2:
21219 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21220 by DEP_INSN and nothing set by DEP_INSN. */
21223 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21227 /* Simplify the test for uninteresting insns. */
21228 if (insn_type != TYPE_SETCC
21229 && insn_type != TYPE_ICMOV
21230 && insn_type != TYPE_FCMOV
21231 && insn_type != TYPE_IBR)
21234 if ((set = single_set (dep_insn)) != 0)
21236 set = SET_DEST (set);
21239 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21240 && XVECLEN (PATTERN (dep_insn), 0) == 2
21241 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21242 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21244 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21245 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21250 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21253 /* This test is true if the dependent insn reads the flags but
21254 not any other potentially set register. */
21255 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21258 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21264 /* Return true iff USE_INSN has a memory address with operands set by
21268 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21271 extract_insn_cached (use_insn);
21272 for (i = recog_data.n_operands - 1; i >= 0; --i)
21273 if (MEM_P (recog_data.operand[i]))
21275 rtx addr = XEXP (recog_data.operand[i], 0);
21276 return modified_in_p (addr, set_insn) != 0;
21282 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21284 enum attr_type insn_type, dep_insn_type;
21285 enum attr_memory memory;
21287 int dep_insn_code_number;
21289 /* Anti and output dependencies have zero cost on all CPUs. */
21290 if (REG_NOTE_KIND (link) != 0)
21293 dep_insn_code_number = recog_memoized (dep_insn);
21295 /* If we can't recognize the insns, we can't really do anything. */
21296 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21299 insn_type = get_attr_type (insn);
21300 dep_insn_type = get_attr_type (dep_insn);
21304 case PROCESSOR_PENTIUM:
21305 /* Address Generation Interlock adds a cycle of latency. */
21306 if (insn_type == TYPE_LEA)
21308 rtx addr = PATTERN (insn);
21310 if (GET_CODE (addr) == PARALLEL)
21311 addr = XVECEXP (addr, 0, 0);
21313 gcc_assert (GET_CODE (addr) == SET);
21315 addr = SET_SRC (addr);
21316 if (modified_in_p (addr, dep_insn))
21319 else if (ix86_agi_dependent (dep_insn, insn))
21322 /* ??? Compares pair with jump/setcc. */
21323 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21326 /* Floating point stores require value to be ready one cycle earlier. */
21327 if (insn_type == TYPE_FMOV
21328 && get_attr_memory (insn) == MEMORY_STORE
21329 && !ix86_agi_dependent (dep_insn, insn))
21333 case PROCESSOR_PENTIUMPRO:
21334 memory = get_attr_memory (insn);
21336 /* INT->FP conversion is expensive. */
21337 if (get_attr_fp_int_src (dep_insn))
21340 /* There is one cycle extra latency between an FP op and a store. */
21341 if (insn_type == TYPE_FMOV
21342 && (set = single_set (dep_insn)) != NULL_RTX
21343 && (set2 = single_set (insn)) != NULL_RTX
21344 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21345 && MEM_P (SET_DEST (set2)))
21348 /* Show ability of reorder buffer to hide latency of load by executing
21349 in parallel with previous instruction in case
21350 previous instruction is not needed to compute the address. */
21351 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21352 && !ix86_agi_dependent (dep_insn, insn))
21354 /* Claim moves to take one cycle, as core can issue one load
21355 at time and the next load can start cycle later. */
21356 if (dep_insn_type == TYPE_IMOV
21357 || dep_insn_type == TYPE_FMOV)
21365 memory = get_attr_memory (insn);
21367 /* The esp dependency is resolved before the instruction is really
21369 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21370 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21373 /* INT->FP conversion is expensive. */
21374 if (get_attr_fp_int_src (dep_insn))
21377 /* Show ability of reorder buffer to hide latency of load by executing
21378 in parallel with previous instruction in case
21379 previous instruction is not needed to compute the address. */
21380 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21381 && !ix86_agi_dependent (dep_insn, insn))
21383 /* Claim moves to take one cycle, as core can issue one load
21384 at time and the next load can start cycle later. */
21385 if (dep_insn_type == TYPE_IMOV
21386 || dep_insn_type == TYPE_FMOV)
21395 case PROCESSOR_ATHLON:
21397 case PROCESSOR_AMDFAM10:
21398 case PROCESSOR_BDVER1:
21399 case PROCESSOR_ATOM:
21400 case PROCESSOR_GENERIC32:
21401 case PROCESSOR_GENERIC64:
21402 memory = get_attr_memory (insn);
21404 /* Show ability of reorder buffer to hide latency of load by executing
21405 in parallel with previous instruction in case
21406 previous instruction is not needed to compute the address. */
21407 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21408 && !ix86_agi_dependent (dep_insn, insn))
21410 enum attr_unit unit = get_attr_unit (insn);
21413 /* Because of the difference between the length of integer and
21414 floating unit pipeline preparation stages, the memory operands
21415 for floating point are cheaper.
21417 ??? For Athlon it the difference is most probably 2. */
21418 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21421 loadcost = TARGET_ATHLON ? 2 : 0;
21423 if (cost >= loadcost)
21436 /* How many alternative schedules to try. This should be as wide as the
21437 scheduling freedom in the DFA, but no wider. Making this value too
21438 large results extra work for the scheduler. */
21441 ia32_multipass_dfa_lookahead (void)
21445 case PROCESSOR_PENTIUM:
21448 case PROCESSOR_PENTIUMPRO:
21458 /* Compute the alignment given to a constant that is being placed in memory.
21459 EXP is the constant and ALIGN is the alignment that the object would
21461 The value of this function is used instead of that alignment to align
21465 ix86_constant_alignment (tree exp, int align)
21467 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21468 || TREE_CODE (exp) == INTEGER_CST)
21470 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21472 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21475 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21476 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21477 return BITS_PER_WORD;
21482 /* Compute the alignment for a static variable.
21483 TYPE is the data type, and ALIGN is the alignment that
21484 the object would ordinarily have. The value of this function is used
21485 instead of that alignment to align the object. */
21488 ix86_data_alignment (tree type, int align)
21490 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21492 if (AGGREGATE_TYPE_P (type)
21493 && TYPE_SIZE (type)
21494 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21495 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21496 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21497 && align < max_align)
21500 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21501 to 16byte boundary. */
21504 if (AGGREGATE_TYPE_P (type)
21505 && TYPE_SIZE (type)
21506 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21507 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21508 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21512 if (TREE_CODE (type) == ARRAY_TYPE)
21514 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21516 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21519 else if (TREE_CODE (type) == COMPLEX_TYPE)
21522 if (TYPE_MODE (type) == DCmode && align < 64)
21524 if ((TYPE_MODE (type) == XCmode
21525 || TYPE_MODE (type) == TCmode) && align < 128)
21528 else if ((TREE_CODE (type) == RECORD_TYPE
21529 || TREE_CODE (type) == UNION_TYPE
21530 || TREE_CODE (type) == QUAL_UNION_TYPE)
21531 && TYPE_FIELDS (type))
21533 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21535 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21538 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21539 || TREE_CODE (type) == INTEGER_TYPE)
21541 if (TYPE_MODE (type) == DFmode && align < 64)
21543 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21550 /* Compute the alignment for a local variable or a stack slot. EXP is
21551 the data type or decl itself, MODE is the widest mode available and
21552 ALIGN is the alignment that the object would ordinarily have. The
21553 value of this macro is used instead of that alignment to align the
21557 ix86_local_alignment (tree exp, enum machine_mode mode,
21558 unsigned int align)
21562 if (exp && DECL_P (exp))
21564 type = TREE_TYPE (exp);
21573 /* Don't do dynamic stack realignment for long long objects with
21574 -mpreferred-stack-boundary=2. */
21577 && ix86_preferred_stack_boundary < 64
21578 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21579 && (!type || !TYPE_USER_ALIGN (type))
21580 && (!decl || !DECL_USER_ALIGN (decl)))
21583 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21584 register in MODE. We will return the largest alignment of XF
21588 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21589 align = GET_MODE_ALIGNMENT (DFmode);
21593 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21594 to 16byte boundary. Exact wording is:
21596 An array uses the same alignment as its elements, except that a local or
21597 global array variable of length at least 16 bytes or
21598 a C99 variable-length array variable always has alignment of at least 16 bytes.
21600 This was added to allow use of aligned SSE instructions at arrays. This
21601 rule is meant for static storage (where compiler can not do the analysis
21602 by itself). We follow it for automatic variables only when convenient.
21603 We fully control everything in the function compiled and functions from
21604 other unit can not rely on the alignment.
21606 Exclude va_list type. It is the common case of local array where
21607 we can not benefit from the alignment. */
21608 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21611 if (AGGREGATE_TYPE_P (type)
21612 && (TYPE_MAIN_VARIANT (type)
21613 != TYPE_MAIN_VARIANT (va_list_type_node))
21614 && TYPE_SIZE (type)
21615 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21616 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21617 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21620 if (TREE_CODE (type) == ARRAY_TYPE)
21622 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21624 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21627 else if (TREE_CODE (type) == COMPLEX_TYPE)
21629 if (TYPE_MODE (type) == DCmode && align < 64)
21631 if ((TYPE_MODE (type) == XCmode
21632 || TYPE_MODE (type) == TCmode) && align < 128)
21635 else if ((TREE_CODE (type) == RECORD_TYPE
21636 || TREE_CODE (type) == UNION_TYPE
21637 || TREE_CODE (type) == QUAL_UNION_TYPE)
21638 && TYPE_FIELDS (type))
21640 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21642 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21645 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21646 || TREE_CODE (type) == INTEGER_TYPE)
21649 if (TYPE_MODE (type) == DFmode && align < 64)
21651 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21657 /* Compute the minimum required alignment for dynamic stack realignment
21658 purposes for a local variable, parameter or a stack slot. EXP is
21659 the data type or decl itself, MODE is its mode and ALIGN is the
21660 alignment that the object would ordinarily have. */
21663 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21664 unsigned int align)
21668 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21671 if (exp && DECL_P (exp))
21673 type = TREE_TYPE (exp);
21682 /* Don't do dynamic stack realignment for long long objects with
21683 -mpreferred-stack-boundary=2. */
21684 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21685 && (!type || !TYPE_USER_ALIGN (type))
21686 && (!decl || !DECL_USER_ALIGN (decl)))
21692 /* Find a location for the static chain incoming to a nested function.
21693 This is a register, unless all free registers are used by arguments. */
21696 ix86_static_chain (const_tree fndecl, bool incoming_p)
21700 if (!DECL_STATIC_CHAIN (fndecl))
21705 /* We always use R10 in 64-bit mode. */
21711 /* By default in 32-bit mode we use ECX to pass the static chain. */
21714 fntype = TREE_TYPE (fndecl);
21715 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21717 /* Fastcall functions use ecx/edx for arguments, which leaves
21718 us with EAX for the static chain. */
21721 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21723 /* Thiscall functions use ecx for arguments, which leaves
21724 us with EAX for the static chain. */
21727 else if (ix86_function_regparm (fntype, fndecl) == 3)
21729 /* For regparm 3, we have no free call-clobbered registers in
21730 which to store the static chain. In order to implement this,
21731 we have the trampoline push the static chain to the stack.
21732 However, we can't push a value below the return address when
21733 we call the nested function directly, so we have to use an
21734 alternate entry point. For this we use ESI, and have the
21735 alternate entry point push ESI, so that things appear the
21736 same once we're executing the nested function. */
21739 if (fndecl == current_function_decl)
21740 ix86_static_chain_on_stack = true;
21741 return gen_frame_mem (SImode,
21742 plus_constant (arg_pointer_rtx, -8));
21748 return gen_rtx_REG (Pmode, regno);
21751 /* Emit RTL insns to initialize the variable parts of a trampoline.
21752 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21753 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21754 to be passed to the target function. */
21757 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21761 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21768 /* Depending on the static chain location, either load a register
21769 with a constant, or push the constant to the stack. All of the
21770 instructions are the same size. */
21771 chain = ix86_static_chain (fndecl, true);
21774 if (REGNO (chain) == CX_REG)
21776 else if (REGNO (chain) == AX_REG)
21779 gcc_unreachable ();
21784 mem = adjust_address (m_tramp, QImode, 0);
21785 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21787 mem = adjust_address (m_tramp, SImode, 1);
21788 emit_move_insn (mem, chain_value);
21790 /* Compute offset from the end of the jmp to the target function.
21791 In the case in which the trampoline stores the static chain on
21792 the stack, we need to skip the first insn which pushes the
21793 (call-saved) register static chain; this push is 1 byte. */
21794 disp = expand_binop (SImode, sub_optab, fnaddr,
21795 plus_constant (XEXP (m_tramp, 0),
21796 MEM_P (chain) ? 9 : 10),
21797 NULL_RTX, 1, OPTAB_DIRECT);
21799 mem = adjust_address (m_tramp, QImode, 5);
21800 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21802 mem = adjust_address (m_tramp, SImode, 6);
21803 emit_move_insn (mem, disp);
21809 /* Load the function address to r11. Try to load address using
21810 the shorter movl instead of movabs. We may want to support
21811 movq for kernel mode, but kernel does not use trampolines at
21813 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21815 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21817 mem = adjust_address (m_tramp, HImode, offset);
21818 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21820 mem = adjust_address (m_tramp, SImode, offset + 2);
21821 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21826 mem = adjust_address (m_tramp, HImode, offset);
21827 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21829 mem = adjust_address (m_tramp, DImode, offset + 2);
21830 emit_move_insn (mem, fnaddr);
21834 /* Load static chain using movabs to r10. */
21835 mem = adjust_address (m_tramp, HImode, offset);
21836 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21838 mem = adjust_address (m_tramp, DImode, offset + 2);
21839 emit_move_insn (mem, chain_value);
21842 /* Jump to r11; the last (unused) byte is a nop, only there to
21843 pad the write out to a single 32-bit store. */
21844 mem = adjust_address (m_tramp, SImode, offset);
21845 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21848 gcc_assert (offset <= TRAMPOLINE_SIZE);
21851 #ifdef ENABLE_EXECUTE_STACK
21852 #ifdef CHECK_EXECUTE_STACK_ENABLED
21853 if (CHECK_EXECUTE_STACK_ENABLED)
21855 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21856 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21860 /* The following file contains several enumerations and data structures
21861 built from the definitions in i386-builtin-types.def. */
21863 #include "i386-builtin-types.inc"
21865 /* Table for the ix86 builtin non-function types. */
21866 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21868 /* Retrieve an element from the above table, building some of
21869 the types lazily. */
21872 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21874 unsigned int index;
21877 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21879 type = ix86_builtin_type_tab[(int) tcode];
21883 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21884 if (tcode <= IX86_BT_LAST_VECT)
21886 enum machine_mode mode;
21888 index = tcode - IX86_BT_LAST_PRIM - 1;
21889 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21890 mode = ix86_builtin_type_vect_mode[index];
21892 type = build_vector_type_for_mode (itype, mode);
21898 index = tcode - IX86_BT_LAST_VECT - 1;
21899 if (tcode <= IX86_BT_LAST_PTR)
21900 quals = TYPE_UNQUALIFIED;
21902 quals = TYPE_QUAL_CONST;
21904 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21905 if (quals != TYPE_UNQUALIFIED)
21906 itype = build_qualified_type (itype, quals);
21908 type = build_pointer_type (itype);
21911 ix86_builtin_type_tab[(int) tcode] = type;
21915 /* Table for the ix86 builtin function types. */
21916 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21918 /* Retrieve an element from the above table, building some of
21919 the types lazily. */
21922 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21926 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21928 type = ix86_builtin_func_type_tab[(int) tcode];
21932 if (tcode <= IX86_BT_LAST_FUNC)
21934 unsigned start = ix86_builtin_func_start[(int) tcode];
21935 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21936 tree rtype, atype, args = void_list_node;
21939 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21940 for (i = after - 1; i > start; --i)
21942 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21943 args = tree_cons (NULL, atype, args);
21946 type = build_function_type (rtype, args);
21950 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21951 enum ix86_builtin_func_type icode;
21953 icode = ix86_builtin_func_alias_base[index];
21954 type = ix86_get_builtin_func_type (icode);
21957 ix86_builtin_func_type_tab[(int) tcode] = type;
21962 /* Codes for all the SSE/MMX builtins. */
21965 IX86_BUILTIN_ADDPS,
21966 IX86_BUILTIN_ADDSS,
21967 IX86_BUILTIN_DIVPS,
21968 IX86_BUILTIN_DIVSS,
21969 IX86_BUILTIN_MULPS,
21970 IX86_BUILTIN_MULSS,
21971 IX86_BUILTIN_SUBPS,
21972 IX86_BUILTIN_SUBSS,
21974 IX86_BUILTIN_CMPEQPS,
21975 IX86_BUILTIN_CMPLTPS,
21976 IX86_BUILTIN_CMPLEPS,
21977 IX86_BUILTIN_CMPGTPS,
21978 IX86_BUILTIN_CMPGEPS,
21979 IX86_BUILTIN_CMPNEQPS,
21980 IX86_BUILTIN_CMPNLTPS,
21981 IX86_BUILTIN_CMPNLEPS,
21982 IX86_BUILTIN_CMPNGTPS,
21983 IX86_BUILTIN_CMPNGEPS,
21984 IX86_BUILTIN_CMPORDPS,
21985 IX86_BUILTIN_CMPUNORDPS,
21986 IX86_BUILTIN_CMPEQSS,
21987 IX86_BUILTIN_CMPLTSS,
21988 IX86_BUILTIN_CMPLESS,
21989 IX86_BUILTIN_CMPNEQSS,
21990 IX86_BUILTIN_CMPNLTSS,
21991 IX86_BUILTIN_CMPNLESS,
21992 IX86_BUILTIN_CMPNGTSS,
21993 IX86_BUILTIN_CMPNGESS,
21994 IX86_BUILTIN_CMPORDSS,
21995 IX86_BUILTIN_CMPUNORDSS,
21997 IX86_BUILTIN_COMIEQSS,
21998 IX86_BUILTIN_COMILTSS,
21999 IX86_BUILTIN_COMILESS,
22000 IX86_BUILTIN_COMIGTSS,
22001 IX86_BUILTIN_COMIGESS,
22002 IX86_BUILTIN_COMINEQSS,
22003 IX86_BUILTIN_UCOMIEQSS,
22004 IX86_BUILTIN_UCOMILTSS,
22005 IX86_BUILTIN_UCOMILESS,
22006 IX86_BUILTIN_UCOMIGTSS,
22007 IX86_BUILTIN_UCOMIGESS,
22008 IX86_BUILTIN_UCOMINEQSS,
22010 IX86_BUILTIN_CVTPI2PS,
22011 IX86_BUILTIN_CVTPS2PI,
22012 IX86_BUILTIN_CVTSI2SS,
22013 IX86_BUILTIN_CVTSI642SS,
22014 IX86_BUILTIN_CVTSS2SI,
22015 IX86_BUILTIN_CVTSS2SI64,
22016 IX86_BUILTIN_CVTTPS2PI,
22017 IX86_BUILTIN_CVTTSS2SI,
22018 IX86_BUILTIN_CVTTSS2SI64,
22020 IX86_BUILTIN_MAXPS,
22021 IX86_BUILTIN_MAXSS,
22022 IX86_BUILTIN_MINPS,
22023 IX86_BUILTIN_MINSS,
22025 IX86_BUILTIN_LOADUPS,
22026 IX86_BUILTIN_STOREUPS,
22027 IX86_BUILTIN_MOVSS,
22029 IX86_BUILTIN_MOVHLPS,
22030 IX86_BUILTIN_MOVLHPS,
22031 IX86_BUILTIN_LOADHPS,
22032 IX86_BUILTIN_LOADLPS,
22033 IX86_BUILTIN_STOREHPS,
22034 IX86_BUILTIN_STORELPS,
22036 IX86_BUILTIN_MASKMOVQ,
22037 IX86_BUILTIN_MOVMSKPS,
22038 IX86_BUILTIN_PMOVMSKB,
22040 IX86_BUILTIN_MOVNTPS,
22041 IX86_BUILTIN_MOVNTQ,
22043 IX86_BUILTIN_LOADDQU,
22044 IX86_BUILTIN_STOREDQU,
22046 IX86_BUILTIN_PACKSSWB,
22047 IX86_BUILTIN_PACKSSDW,
22048 IX86_BUILTIN_PACKUSWB,
22050 IX86_BUILTIN_PADDB,
22051 IX86_BUILTIN_PADDW,
22052 IX86_BUILTIN_PADDD,
22053 IX86_BUILTIN_PADDQ,
22054 IX86_BUILTIN_PADDSB,
22055 IX86_BUILTIN_PADDSW,
22056 IX86_BUILTIN_PADDUSB,
22057 IX86_BUILTIN_PADDUSW,
22058 IX86_BUILTIN_PSUBB,
22059 IX86_BUILTIN_PSUBW,
22060 IX86_BUILTIN_PSUBD,
22061 IX86_BUILTIN_PSUBQ,
22062 IX86_BUILTIN_PSUBSB,
22063 IX86_BUILTIN_PSUBSW,
22064 IX86_BUILTIN_PSUBUSB,
22065 IX86_BUILTIN_PSUBUSW,
22068 IX86_BUILTIN_PANDN,
22072 IX86_BUILTIN_PAVGB,
22073 IX86_BUILTIN_PAVGW,
22075 IX86_BUILTIN_PCMPEQB,
22076 IX86_BUILTIN_PCMPEQW,
22077 IX86_BUILTIN_PCMPEQD,
22078 IX86_BUILTIN_PCMPGTB,
22079 IX86_BUILTIN_PCMPGTW,
22080 IX86_BUILTIN_PCMPGTD,
22082 IX86_BUILTIN_PMADDWD,
22084 IX86_BUILTIN_PMAXSW,
22085 IX86_BUILTIN_PMAXUB,
22086 IX86_BUILTIN_PMINSW,
22087 IX86_BUILTIN_PMINUB,
22089 IX86_BUILTIN_PMULHUW,
22090 IX86_BUILTIN_PMULHW,
22091 IX86_BUILTIN_PMULLW,
22093 IX86_BUILTIN_PSADBW,
22094 IX86_BUILTIN_PSHUFW,
22096 IX86_BUILTIN_PSLLW,
22097 IX86_BUILTIN_PSLLD,
22098 IX86_BUILTIN_PSLLQ,
22099 IX86_BUILTIN_PSRAW,
22100 IX86_BUILTIN_PSRAD,
22101 IX86_BUILTIN_PSRLW,
22102 IX86_BUILTIN_PSRLD,
22103 IX86_BUILTIN_PSRLQ,
22104 IX86_BUILTIN_PSLLWI,
22105 IX86_BUILTIN_PSLLDI,
22106 IX86_BUILTIN_PSLLQI,
22107 IX86_BUILTIN_PSRAWI,
22108 IX86_BUILTIN_PSRADI,
22109 IX86_BUILTIN_PSRLWI,
22110 IX86_BUILTIN_PSRLDI,
22111 IX86_BUILTIN_PSRLQI,
22113 IX86_BUILTIN_PUNPCKHBW,
22114 IX86_BUILTIN_PUNPCKHWD,
22115 IX86_BUILTIN_PUNPCKHDQ,
22116 IX86_BUILTIN_PUNPCKLBW,
22117 IX86_BUILTIN_PUNPCKLWD,
22118 IX86_BUILTIN_PUNPCKLDQ,
22120 IX86_BUILTIN_SHUFPS,
22122 IX86_BUILTIN_RCPPS,
22123 IX86_BUILTIN_RCPSS,
22124 IX86_BUILTIN_RSQRTPS,
22125 IX86_BUILTIN_RSQRTPS_NR,
22126 IX86_BUILTIN_RSQRTSS,
22127 IX86_BUILTIN_RSQRTF,
22128 IX86_BUILTIN_SQRTPS,
22129 IX86_BUILTIN_SQRTPS_NR,
22130 IX86_BUILTIN_SQRTSS,
22132 IX86_BUILTIN_UNPCKHPS,
22133 IX86_BUILTIN_UNPCKLPS,
22135 IX86_BUILTIN_ANDPS,
22136 IX86_BUILTIN_ANDNPS,
22138 IX86_BUILTIN_XORPS,
22141 IX86_BUILTIN_LDMXCSR,
22142 IX86_BUILTIN_STMXCSR,
22143 IX86_BUILTIN_SFENCE,
22145 /* 3DNow! Original */
22146 IX86_BUILTIN_FEMMS,
22147 IX86_BUILTIN_PAVGUSB,
22148 IX86_BUILTIN_PF2ID,
22149 IX86_BUILTIN_PFACC,
22150 IX86_BUILTIN_PFADD,
22151 IX86_BUILTIN_PFCMPEQ,
22152 IX86_BUILTIN_PFCMPGE,
22153 IX86_BUILTIN_PFCMPGT,
22154 IX86_BUILTIN_PFMAX,
22155 IX86_BUILTIN_PFMIN,
22156 IX86_BUILTIN_PFMUL,
22157 IX86_BUILTIN_PFRCP,
22158 IX86_BUILTIN_PFRCPIT1,
22159 IX86_BUILTIN_PFRCPIT2,
22160 IX86_BUILTIN_PFRSQIT1,
22161 IX86_BUILTIN_PFRSQRT,
22162 IX86_BUILTIN_PFSUB,
22163 IX86_BUILTIN_PFSUBR,
22164 IX86_BUILTIN_PI2FD,
22165 IX86_BUILTIN_PMULHRW,
22167 /* 3DNow! Athlon Extensions */
22168 IX86_BUILTIN_PF2IW,
22169 IX86_BUILTIN_PFNACC,
22170 IX86_BUILTIN_PFPNACC,
22171 IX86_BUILTIN_PI2FW,
22172 IX86_BUILTIN_PSWAPDSI,
22173 IX86_BUILTIN_PSWAPDSF,
22176 IX86_BUILTIN_ADDPD,
22177 IX86_BUILTIN_ADDSD,
22178 IX86_BUILTIN_DIVPD,
22179 IX86_BUILTIN_DIVSD,
22180 IX86_BUILTIN_MULPD,
22181 IX86_BUILTIN_MULSD,
22182 IX86_BUILTIN_SUBPD,
22183 IX86_BUILTIN_SUBSD,
22185 IX86_BUILTIN_CMPEQPD,
22186 IX86_BUILTIN_CMPLTPD,
22187 IX86_BUILTIN_CMPLEPD,
22188 IX86_BUILTIN_CMPGTPD,
22189 IX86_BUILTIN_CMPGEPD,
22190 IX86_BUILTIN_CMPNEQPD,
22191 IX86_BUILTIN_CMPNLTPD,
22192 IX86_BUILTIN_CMPNLEPD,
22193 IX86_BUILTIN_CMPNGTPD,
22194 IX86_BUILTIN_CMPNGEPD,
22195 IX86_BUILTIN_CMPORDPD,
22196 IX86_BUILTIN_CMPUNORDPD,
22197 IX86_BUILTIN_CMPEQSD,
22198 IX86_BUILTIN_CMPLTSD,
22199 IX86_BUILTIN_CMPLESD,
22200 IX86_BUILTIN_CMPNEQSD,
22201 IX86_BUILTIN_CMPNLTSD,
22202 IX86_BUILTIN_CMPNLESD,
22203 IX86_BUILTIN_CMPORDSD,
22204 IX86_BUILTIN_CMPUNORDSD,
22206 IX86_BUILTIN_COMIEQSD,
22207 IX86_BUILTIN_COMILTSD,
22208 IX86_BUILTIN_COMILESD,
22209 IX86_BUILTIN_COMIGTSD,
22210 IX86_BUILTIN_COMIGESD,
22211 IX86_BUILTIN_COMINEQSD,
22212 IX86_BUILTIN_UCOMIEQSD,
22213 IX86_BUILTIN_UCOMILTSD,
22214 IX86_BUILTIN_UCOMILESD,
22215 IX86_BUILTIN_UCOMIGTSD,
22216 IX86_BUILTIN_UCOMIGESD,
22217 IX86_BUILTIN_UCOMINEQSD,
22219 IX86_BUILTIN_MAXPD,
22220 IX86_BUILTIN_MAXSD,
22221 IX86_BUILTIN_MINPD,
22222 IX86_BUILTIN_MINSD,
22224 IX86_BUILTIN_ANDPD,
22225 IX86_BUILTIN_ANDNPD,
22227 IX86_BUILTIN_XORPD,
22229 IX86_BUILTIN_SQRTPD,
22230 IX86_BUILTIN_SQRTSD,
22232 IX86_BUILTIN_UNPCKHPD,
22233 IX86_BUILTIN_UNPCKLPD,
22235 IX86_BUILTIN_SHUFPD,
22237 IX86_BUILTIN_LOADUPD,
22238 IX86_BUILTIN_STOREUPD,
22239 IX86_BUILTIN_MOVSD,
22241 IX86_BUILTIN_LOADHPD,
22242 IX86_BUILTIN_LOADLPD,
22244 IX86_BUILTIN_CVTDQ2PD,
22245 IX86_BUILTIN_CVTDQ2PS,
22247 IX86_BUILTIN_CVTPD2DQ,
22248 IX86_BUILTIN_CVTPD2PI,
22249 IX86_BUILTIN_CVTPD2PS,
22250 IX86_BUILTIN_CVTTPD2DQ,
22251 IX86_BUILTIN_CVTTPD2PI,
22253 IX86_BUILTIN_CVTPI2PD,
22254 IX86_BUILTIN_CVTSI2SD,
22255 IX86_BUILTIN_CVTSI642SD,
22257 IX86_BUILTIN_CVTSD2SI,
22258 IX86_BUILTIN_CVTSD2SI64,
22259 IX86_BUILTIN_CVTSD2SS,
22260 IX86_BUILTIN_CVTSS2SD,
22261 IX86_BUILTIN_CVTTSD2SI,
22262 IX86_BUILTIN_CVTTSD2SI64,
22264 IX86_BUILTIN_CVTPS2DQ,
22265 IX86_BUILTIN_CVTPS2PD,
22266 IX86_BUILTIN_CVTTPS2DQ,
22268 IX86_BUILTIN_MOVNTI,
22269 IX86_BUILTIN_MOVNTPD,
22270 IX86_BUILTIN_MOVNTDQ,
22272 IX86_BUILTIN_MOVQ128,
22275 IX86_BUILTIN_MASKMOVDQU,
22276 IX86_BUILTIN_MOVMSKPD,
22277 IX86_BUILTIN_PMOVMSKB128,
22279 IX86_BUILTIN_PACKSSWB128,
22280 IX86_BUILTIN_PACKSSDW128,
22281 IX86_BUILTIN_PACKUSWB128,
22283 IX86_BUILTIN_PADDB128,
22284 IX86_BUILTIN_PADDW128,
22285 IX86_BUILTIN_PADDD128,
22286 IX86_BUILTIN_PADDQ128,
22287 IX86_BUILTIN_PADDSB128,
22288 IX86_BUILTIN_PADDSW128,
22289 IX86_BUILTIN_PADDUSB128,
22290 IX86_BUILTIN_PADDUSW128,
22291 IX86_BUILTIN_PSUBB128,
22292 IX86_BUILTIN_PSUBW128,
22293 IX86_BUILTIN_PSUBD128,
22294 IX86_BUILTIN_PSUBQ128,
22295 IX86_BUILTIN_PSUBSB128,
22296 IX86_BUILTIN_PSUBSW128,
22297 IX86_BUILTIN_PSUBUSB128,
22298 IX86_BUILTIN_PSUBUSW128,
22300 IX86_BUILTIN_PAND128,
22301 IX86_BUILTIN_PANDN128,
22302 IX86_BUILTIN_POR128,
22303 IX86_BUILTIN_PXOR128,
22305 IX86_BUILTIN_PAVGB128,
22306 IX86_BUILTIN_PAVGW128,
22308 IX86_BUILTIN_PCMPEQB128,
22309 IX86_BUILTIN_PCMPEQW128,
22310 IX86_BUILTIN_PCMPEQD128,
22311 IX86_BUILTIN_PCMPGTB128,
22312 IX86_BUILTIN_PCMPGTW128,
22313 IX86_BUILTIN_PCMPGTD128,
22315 IX86_BUILTIN_PMADDWD128,
22317 IX86_BUILTIN_PMAXSW128,
22318 IX86_BUILTIN_PMAXUB128,
22319 IX86_BUILTIN_PMINSW128,
22320 IX86_BUILTIN_PMINUB128,
22322 IX86_BUILTIN_PMULUDQ,
22323 IX86_BUILTIN_PMULUDQ128,
22324 IX86_BUILTIN_PMULHUW128,
22325 IX86_BUILTIN_PMULHW128,
22326 IX86_BUILTIN_PMULLW128,
22328 IX86_BUILTIN_PSADBW128,
22329 IX86_BUILTIN_PSHUFHW,
22330 IX86_BUILTIN_PSHUFLW,
22331 IX86_BUILTIN_PSHUFD,
22333 IX86_BUILTIN_PSLLDQI128,
22334 IX86_BUILTIN_PSLLWI128,
22335 IX86_BUILTIN_PSLLDI128,
22336 IX86_BUILTIN_PSLLQI128,
22337 IX86_BUILTIN_PSRAWI128,
22338 IX86_BUILTIN_PSRADI128,
22339 IX86_BUILTIN_PSRLDQI128,
22340 IX86_BUILTIN_PSRLWI128,
22341 IX86_BUILTIN_PSRLDI128,
22342 IX86_BUILTIN_PSRLQI128,
22344 IX86_BUILTIN_PSLLDQ128,
22345 IX86_BUILTIN_PSLLW128,
22346 IX86_BUILTIN_PSLLD128,
22347 IX86_BUILTIN_PSLLQ128,
22348 IX86_BUILTIN_PSRAW128,
22349 IX86_BUILTIN_PSRAD128,
22350 IX86_BUILTIN_PSRLW128,
22351 IX86_BUILTIN_PSRLD128,
22352 IX86_BUILTIN_PSRLQ128,
22354 IX86_BUILTIN_PUNPCKHBW128,
22355 IX86_BUILTIN_PUNPCKHWD128,
22356 IX86_BUILTIN_PUNPCKHDQ128,
22357 IX86_BUILTIN_PUNPCKHQDQ128,
22358 IX86_BUILTIN_PUNPCKLBW128,
22359 IX86_BUILTIN_PUNPCKLWD128,
22360 IX86_BUILTIN_PUNPCKLDQ128,
22361 IX86_BUILTIN_PUNPCKLQDQ128,
22363 IX86_BUILTIN_CLFLUSH,
22364 IX86_BUILTIN_MFENCE,
22365 IX86_BUILTIN_LFENCE,
22367 IX86_BUILTIN_BSRSI,
22368 IX86_BUILTIN_BSRDI,
22369 IX86_BUILTIN_RDPMC,
22370 IX86_BUILTIN_RDTSC,
22371 IX86_BUILTIN_RDTSCP,
22372 IX86_BUILTIN_ROLQI,
22373 IX86_BUILTIN_ROLHI,
22374 IX86_BUILTIN_RORQI,
22375 IX86_BUILTIN_RORHI,
22378 IX86_BUILTIN_ADDSUBPS,
22379 IX86_BUILTIN_HADDPS,
22380 IX86_BUILTIN_HSUBPS,
22381 IX86_BUILTIN_MOVSHDUP,
22382 IX86_BUILTIN_MOVSLDUP,
22383 IX86_BUILTIN_ADDSUBPD,
22384 IX86_BUILTIN_HADDPD,
22385 IX86_BUILTIN_HSUBPD,
22386 IX86_BUILTIN_LDDQU,
22388 IX86_BUILTIN_MONITOR,
22389 IX86_BUILTIN_MWAIT,
22392 IX86_BUILTIN_PHADDW,
22393 IX86_BUILTIN_PHADDD,
22394 IX86_BUILTIN_PHADDSW,
22395 IX86_BUILTIN_PHSUBW,
22396 IX86_BUILTIN_PHSUBD,
22397 IX86_BUILTIN_PHSUBSW,
22398 IX86_BUILTIN_PMADDUBSW,
22399 IX86_BUILTIN_PMULHRSW,
22400 IX86_BUILTIN_PSHUFB,
22401 IX86_BUILTIN_PSIGNB,
22402 IX86_BUILTIN_PSIGNW,
22403 IX86_BUILTIN_PSIGND,
22404 IX86_BUILTIN_PALIGNR,
22405 IX86_BUILTIN_PABSB,
22406 IX86_BUILTIN_PABSW,
22407 IX86_BUILTIN_PABSD,
22409 IX86_BUILTIN_PHADDW128,
22410 IX86_BUILTIN_PHADDD128,
22411 IX86_BUILTIN_PHADDSW128,
22412 IX86_BUILTIN_PHSUBW128,
22413 IX86_BUILTIN_PHSUBD128,
22414 IX86_BUILTIN_PHSUBSW128,
22415 IX86_BUILTIN_PMADDUBSW128,
22416 IX86_BUILTIN_PMULHRSW128,
22417 IX86_BUILTIN_PSHUFB128,
22418 IX86_BUILTIN_PSIGNB128,
22419 IX86_BUILTIN_PSIGNW128,
22420 IX86_BUILTIN_PSIGND128,
22421 IX86_BUILTIN_PALIGNR128,
22422 IX86_BUILTIN_PABSB128,
22423 IX86_BUILTIN_PABSW128,
22424 IX86_BUILTIN_PABSD128,
22426 /* AMDFAM10 - SSE4A New Instructions. */
22427 IX86_BUILTIN_MOVNTSD,
22428 IX86_BUILTIN_MOVNTSS,
22429 IX86_BUILTIN_EXTRQI,
22430 IX86_BUILTIN_EXTRQ,
22431 IX86_BUILTIN_INSERTQI,
22432 IX86_BUILTIN_INSERTQ,
22435 IX86_BUILTIN_BLENDPD,
22436 IX86_BUILTIN_BLENDPS,
22437 IX86_BUILTIN_BLENDVPD,
22438 IX86_BUILTIN_BLENDVPS,
22439 IX86_BUILTIN_PBLENDVB128,
22440 IX86_BUILTIN_PBLENDW128,
22445 IX86_BUILTIN_INSERTPS128,
22447 IX86_BUILTIN_MOVNTDQA,
22448 IX86_BUILTIN_MPSADBW128,
22449 IX86_BUILTIN_PACKUSDW128,
22450 IX86_BUILTIN_PCMPEQQ,
22451 IX86_BUILTIN_PHMINPOSUW128,
22453 IX86_BUILTIN_PMAXSB128,
22454 IX86_BUILTIN_PMAXSD128,
22455 IX86_BUILTIN_PMAXUD128,
22456 IX86_BUILTIN_PMAXUW128,
22458 IX86_BUILTIN_PMINSB128,
22459 IX86_BUILTIN_PMINSD128,
22460 IX86_BUILTIN_PMINUD128,
22461 IX86_BUILTIN_PMINUW128,
22463 IX86_BUILTIN_PMOVSXBW128,
22464 IX86_BUILTIN_PMOVSXBD128,
22465 IX86_BUILTIN_PMOVSXBQ128,
22466 IX86_BUILTIN_PMOVSXWD128,
22467 IX86_BUILTIN_PMOVSXWQ128,
22468 IX86_BUILTIN_PMOVSXDQ128,
22470 IX86_BUILTIN_PMOVZXBW128,
22471 IX86_BUILTIN_PMOVZXBD128,
22472 IX86_BUILTIN_PMOVZXBQ128,
22473 IX86_BUILTIN_PMOVZXWD128,
22474 IX86_BUILTIN_PMOVZXWQ128,
22475 IX86_BUILTIN_PMOVZXDQ128,
22477 IX86_BUILTIN_PMULDQ128,
22478 IX86_BUILTIN_PMULLD128,
22480 IX86_BUILTIN_ROUNDPD,
22481 IX86_BUILTIN_ROUNDPS,
22482 IX86_BUILTIN_ROUNDSD,
22483 IX86_BUILTIN_ROUNDSS,
22485 IX86_BUILTIN_PTESTZ,
22486 IX86_BUILTIN_PTESTC,
22487 IX86_BUILTIN_PTESTNZC,
22489 IX86_BUILTIN_VEC_INIT_V2SI,
22490 IX86_BUILTIN_VEC_INIT_V4HI,
22491 IX86_BUILTIN_VEC_INIT_V8QI,
22492 IX86_BUILTIN_VEC_EXT_V2DF,
22493 IX86_BUILTIN_VEC_EXT_V2DI,
22494 IX86_BUILTIN_VEC_EXT_V4SF,
22495 IX86_BUILTIN_VEC_EXT_V4SI,
22496 IX86_BUILTIN_VEC_EXT_V8HI,
22497 IX86_BUILTIN_VEC_EXT_V2SI,
22498 IX86_BUILTIN_VEC_EXT_V4HI,
22499 IX86_BUILTIN_VEC_EXT_V16QI,
22500 IX86_BUILTIN_VEC_SET_V2DI,
22501 IX86_BUILTIN_VEC_SET_V4SF,
22502 IX86_BUILTIN_VEC_SET_V4SI,
22503 IX86_BUILTIN_VEC_SET_V8HI,
22504 IX86_BUILTIN_VEC_SET_V4HI,
22505 IX86_BUILTIN_VEC_SET_V16QI,
22507 IX86_BUILTIN_VEC_PACK_SFIX,
22510 IX86_BUILTIN_CRC32QI,
22511 IX86_BUILTIN_CRC32HI,
22512 IX86_BUILTIN_CRC32SI,
22513 IX86_BUILTIN_CRC32DI,
22515 IX86_BUILTIN_PCMPESTRI128,
22516 IX86_BUILTIN_PCMPESTRM128,
22517 IX86_BUILTIN_PCMPESTRA128,
22518 IX86_BUILTIN_PCMPESTRC128,
22519 IX86_BUILTIN_PCMPESTRO128,
22520 IX86_BUILTIN_PCMPESTRS128,
22521 IX86_BUILTIN_PCMPESTRZ128,
22522 IX86_BUILTIN_PCMPISTRI128,
22523 IX86_BUILTIN_PCMPISTRM128,
22524 IX86_BUILTIN_PCMPISTRA128,
22525 IX86_BUILTIN_PCMPISTRC128,
22526 IX86_BUILTIN_PCMPISTRO128,
22527 IX86_BUILTIN_PCMPISTRS128,
22528 IX86_BUILTIN_PCMPISTRZ128,
22530 IX86_BUILTIN_PCMPGTQ,
22532 /* AES instructions */
22533 IX86_BUILTIN_AESENC128,
22534 IX86_BUILTIN_AESENCLAST128,
22535 IX86_BUILTIN_AESDEC128,
22536 IX86_BUILTIN_AESDECLAST128,
22537 IX86_BUILTIN_AESIMC128,
22538 IX86_BUILTIN_AESKEYGENASSIST128,
22540 /* PCLMUL instruction */
22541 IX86_BUILTIN_PCLMULQDQ128,
22544 IX86_BUILTIN_ADDPD256,
22545 IX86_BUILTIN_ADDPS256,
22546 IX86_BUILTIN_ADDSUBPD256,
22547 IX86_BUILTIN_ADDSUBPS256,
22548 IX86_BUILTIN_ANDPD256,
22549 IX86_BUILTIN_ANDPS256,
22550 IX86_BUILTIN_ANDNPD256,
22551 IX86_BUILTIN_ANDNPS256,
22552 IX86_BUILTIN_BLENDPD256,
22553 IX86_BUILTIN_BLENDPS256,
22554 IX86_BUILTIN_BLENDVPD256,
22555 IX86_BUILTIN_BLENDVPS256,
22556 IX86_BUILTIN_DIVPD256,
22557 IX86_BUILTIN_DIVPS256,
22558 IX86_BUILTIN_DPPS256,
22559 IX86_BUILTIN_HADDPD256,
22560 IX86_BUILTIN_HADDPS256,
22561 IX86_BUILTIN_HSUBPD256,
22562 IX86_BUILTIN_HSUBPS256,
22563 IX86_BUILTIN_MAXPD256,
22564 IX86_BUILTIN_MAXPS256,
22565 IX86_BUILTIN_MINPD256,
22566 IX86_BUILTIN_MINPS256,
22567 IX86_BUILTIN_MULPD256,
22568 IX86_BUILTIN_MULPS256,
22569 IX86_BUILTIN_ORPD256,
22570 IX86_BUILTIN_ORPS256,
22571 IX86_BUILTIN_SHUFPD256,
22572 IX86_BUILTIN_SHUFPS256,
22573 IX86_BUILTIN_SUBPD256,
22574 IX86_BUILTIN_SUBPS256,
22575 IX86_BUILTIN_XORPD256,
22576 IX86_BUILTIN_XORPS256,
22577 IX86_BUILTIN_CMPSD,
22578 IX86_BUILTIN_CMPSS,
22579 IX86_BUILTIN_CMPPD,
22580 IX86_BUILTIN_CMPPS,
22581 IX86_BUILTIN_CMPPD256,
22582 IX86_BUILTIN_CMPPS256,
22583 IX86_BUILTIN_CVTDQ2PD256,
22584 IX86_BUILTIN_CVTDQ2PS256,
22585 IX86_BUILTIN_CVTPD2PS256,
22586 IX86_BUILTIN_CVTPS2DQ256,
22587 IX86_BUILTIN_CVTPS2PD256,
22588 IX86_BUILTIN_CVTTPD2DQ256,
22589 IX86_BUILTIN_CVTPD2DQ256,
22590 IX86_BUILTIN_CVTTPS2DQ256,
22591 IX86_BUILTIN_EXTRACTF128PD256,
22592 IX86_BUILTIN_EXTRACTF128PS256,
22593 IX86_BUILTIN_EXTRACTF128SI256,
22594 IX86_BUILTIN_VZEROALL,
22595 IX86_BUILTIN_VZEROUPPER,
22596 IX86_BUILTIN_VPERMILVARPD,
22597 IX86_BUILTIN_VPERMILVARPS,
22598 IX86_BUILTIN_VPERMILVARPD256,
22599 IX86_BUILTIN_VPERMILVARPS256,
22600 IX86_BUILTIN_VPERMILPD,
22601 IX86_BUILTIN_VPERMILPS,
22602 IX86_BUILTIN_VPERMILPD256,
22603 IX86_BUILTIN_VPERMILPS256,
22604 IX86_BUILTIN_VPERMIL2PD,
22605 IX86_BUILTIN_VPERMIL2PS,
22606 IX86_BUILTIN_VPERMIL2PD256,
22607 IX86_BUILTIN_VPERMIL2PS256,
22608 IX86_BUILTIN_VPERM2F128PD256,
22609 IX86_BUILTIN_VPERM2F128PS256,
22610 IX86_BUILTIN_VPERM2F128SI256,
22611 IX86_BUILTIN_VBROADCASTSS,
22612 IX86_BUILTIN_VBROADCASTSD256,
22613 IX86_BUILTIN_VBROADCASTSS256,
22614 IX86_BUILTIN_VBROADCASTPD256,
22615 IX86_BUILTIN_VBROADCASTPS256,
22616 IX86_BUILTIN_VINSERTF128PD256,
22617 IX86_BUILTIN_VINSERTF128PS256,
22618 IX86_BUILTIN_VINSERTF128SI256,
22619 IX86_BUILTIN_LOADUPD256,
22620 IX86_BUILTIN_LOADUPS256,
22621 IX86_BUILTIN_STOREUPD256,
22622 IX86_BUILTIN_STOREUPS256,
22623 IX86_BUILTIN_LDDQU256,
22624 IX86_BUILTIN_MOVNTDQ256,
22625 IX86_BUILTIN_MOVNTPD256,
22626 IX86_BUILTIN_MOVNTPS256,
22627 IX86_BUILTIN_LOADDQU256,
22628 IX86_BUILTIN_STOREDQU256,
22629 IX86_BUILTIN_MASKLOADPD,
22630 IX86_BUILTIN_MASKLOADPS,
22631 IX86_BUILTIN_MASKSTOREPD,
22632 IX86_BUILTIN_MASKSTOREPS,
22633 IX86_BUILTIN_MASKLOADPD256,
22634 IX86_BUILTIN_MASKLOADPS256,
22635 IX86_BUILTIN_MASKSTOREPD256,
22636 IX86_BUILTIN_MASKSTOREPS256,
22637 IX86_BUILTIN_MOVSHDUP256,
22638 IX86_BUILTIN_MOVSLDUP256,
22639 IX86_BUILTIN_MOVDDUP256,
22641 IX86_BUILTIN_SQRTPD256,
22642 IX86_BUILTIN_SQRTPS256,
22643 IX86_BUILTIN_SQRTPS_NR256,
22644 IX86_BUILTIN_RSQRTPS256,
22645 IX86_BUILTIN_RSQRTPS_NR256,
22647 IX86_BUILTIN_RCPPS256,
22649 IX86_BUILTIN_ROUNDPD256,
22650 IX86_BUILTIN_ROUNDPS256,
22652 IX86_BUILTIN_UNPCKHPD256,
22653 IX86_BUILTIN_UNPCKLPD256,
22654 IX86_BUILTIN_UNPCKHPS256,
22655 IX86_BUILTIN_UNPCKLPS256,
22657 IX86_BUILTIN_SI256_SI,
22658 IX86_BUILTIN_PS256_PS,
22659 IX86_BUILTIN_PD256_PD,
22660 IX86_BUILTIN_SI_SI256,
22661 IX86_BUILTIN_PS_PS256,
22662 IX86_BUILTIN_PD_PD256,
22664 IX86_BUILTIN_VTESTZPD,
22665 IX86_BUILTIN_VTESTCPD,
22666 IX86_BUILTIN_VTESTNZCPD,
22667 IX86_BUILTIN_VTESTZPS,
22668 IX86_BUILTIN_VTESTCPS,
22669 IX86_BUILTIN_VTESTNZCPS,
22670 IX86_BUILTIN_VTESTZPD256,
22671 IX86_BUILTIN_VTESTCPD256,
22672 IX86_BUILTIN_VTESTNZCPD256,
22673 IX86_BUILTIN_VTESTZPS256,
22674 IX86_BUILTIN_VTESTCPS256,
22675 IX86_BUILTIN_VTESTNZCPS256,
22676 IX86_BUILTIN_PTESTZ256,
22677 IX86_BUILTIN_PTESTC256,
22678 IX86_BUILTIN_PTESTNZC256,
22680 IX86_BUILTIN_MOVMSKPD256,
22681 IX86_BUILTIN_MOVMSKPS256,
22683 /* TFmode support builtins. */
22685 IX86_BUILTIN_HUGE_VALQ,
22686 IX86_BUILTIN_FABSQ,
22687 IX86_BUILTIN_COPYSIGNQ,
22689 /* Vectorizer support builtins. */
22690 IX86_BUILTIN_CPYSGNPS,
22691 IX86_BUILTIN_CPYSGNPD,
22693 IX86_BUILTIN_CVTUDQ2PS,
22695 IX86_BUILTIN_VEC_PERM_V2DF,
22696 IX86_BUILTIN_VEC_PERM_V4SF,
22697 IX86_BUILTIN_VEC_PERM_V2DI,
22698 IX86_BUILTIN_VEC_PERM_V4SI,
22699 IX86_BUILTIN_VEC_PERM_V8HI,
22700 IX86_BUILTIN_VEC_PERM_V16QI,
22701 IX86_BUILTIN_VEC_PERM_V2DI_U,
22702 IX86_BUILTIN_VEC_PERM_V4SI_U,
22703 IX86_BUILTIN_VEC_PERM_V8HI_U,
22704 IX86_BUILTIN_VEC_PERM_V16QI_U,
22705 IX86_BUILTIN_VEC_PERM_V4DF,
22706 IX86_BUILTIN_VEC_PERM_V8SF,
22708 /* FMA4 and XOP instructions. */
22709 IX86_BUILTIN_VFMADDSS,
22710 IX86_BUILTIN_VFMADDSD,
22711 IX86_BUILTIN_VFMADDPS,
22712 IX86_BUILTIN_VFMADDPD,
22713 IX86_BUILTIN_VFMSUBSS,
22714 IX86_BUILTIN_VFMSUBSD,
22715 IX86_BUILTIN_VFMSUBPS,
22716 IX86_BUILTIN_VFMSUBPD,
22717 IX86_BUILTIN_VFMADDSUBPS,
22718 IX86_BUILTIN_VFMADDSUBPD,
22719 IX86_BUILTIN_VFMSUBADDPS,
22720 IX86_BUILTIN_VFMSUBADDPD,
22721 IX86_BUILTIN_VFNMADDSS,
22722 IX86_BUILTIN_VFNMADDSD,
22723 IX86_BUILTIN_VFNMADDPS,
22724 IX86_BUILTIN_VFNMADDPD,
22725 IX86_BUILTIN_VFNMSUBSS,
22726 IX86_BUILTIN_VFNMSUBSD,
22727 IX86_BUILTIN_VFNMSUBPS,
22728 IX86_BUILTIN_VFNMSUBPD,
22729 IX86_BUILTIN_VFMADDPS256,
22730 IX86_BUILTIN_VFMADDPD256,
22731 IX86_BUILTIN_VFMSUBPS256,
22732 IX86_BUILTIN_VFMSUBPD256,
22733 IX86_BUILTIN_VFMADDSUBPS256,
22734 IX86_BUILTIN_VFMADDSUBPD256,
22735 IX86_BUILTIN_VFMSUBADDPS256,
22736 IX86_BUILTIN_VFMSUBADDPD256,
22737 IX86_BUILTIN_VFNMADDPS256,
22738 IX86_BUILTIN_VFNMADDPD256,
22739 IX86_BUILTIN_VFNMSUBPS256,
22740 IX86_BUILTIN_VFNMSUBPD256,
22742 IX86_BUILTIN_VPCMOV,
22743 IX86_BUILTIN_VPCMOV_V2DI,
22744 IX86_BUILTIN_VPCMOV_V4SI,
22745 IX86_BUILTIN_VPCMOV_V8HI,
22746 IX86_BUILTIN_VPCMOV_V16QI,
22747 IX86_BUILTIN_VPCMOV_V4SF,
22748 IX86_BUILTIN_VPCMOV_V2DF,
22749 IX86_BUILTIN_VPCMOV256,
22750 IX86_BUILTIN_VPCMOV_V4DI256,
22751 IX86_BUILTIN_VPCMOV_V8SI256,
22752 IX86_BUILTIN_VPCMOV_V16HI256,
22753 IX86_BUILTIN_VPCMOV_V32QI256,
22754 IX86_BUILTIN_VPCMOV_V8SF256,
22755 IX86_BUILTIN_VPCMOV_V4DF256,
22757 IX86_BUILTIN_VPPERM,
22759 IX86_BUILTIN_VPMACSSWW,
22760 IX86_BUILTIN_VPMACSWW,
22761 IX86_BUILTIN_VPMACSSWD,
22762 IX86_BUILTIN_VPMACSWD,
22763 IX86_BUILTIN_VPMACSSDD,
22764 IX86_BUILTIN_VPMACSDD,
22765 IX86_BUILTIN_VPMACSSDQL,
22766 IX86_BUILTIN_VPMACSSDQH,
22767 IX86_BUILTIN_VPMACSDQL,
22768 IX86_BUILTIN_VPMACSDQH,
22769 IX86_BUILTIN_VPMADCSSWD,
22770 IX86_BUILTIN_VPMADCSWD,
22772 IX86_BUILTIN_VPHADDBW,
22773 IX86_BUILTIN_VPHADDBD,
22774 IX86_BUILTIN_VPHADDBQ,
22775 IX86_BUILTIN_VPHADDWD,
22776 IX86_BUILTIN_VPHADDWQ,
22777 IX86_BUILTIN_VPHADDDQ,
22778 IX86_BUILTIN_VPHADDUBW,
22779 IX86_BUILTIN_VPHADDUBD,
22780 IX86_BUILTIN_VPHADDUBQ,
22781 IX86_BUILTIN_VPHADDUWD,
22782 IX86_BUILTIN_VPHADDUWQ,
22783 IX86_BUILTIN_VPHADDUDQ,
22784 IX86_BUILTIN_VPHSUBBW,
22785 IX86_BUILTIN_VPHSUBWD,
22786 IX86_BUILTIN_VPHSUBDQ,
22788 IX86_BUILTIN_VPROTB,
22789 IX86_BUILTIN_VPROTW,
22790 IX86_BUILTIN_VPROTD,
22791 IX86_BUILTIN_VPROTQ,
22792 IX86_BUILTIN_VPROTB_IMM,
22793 IX86_BUILTIN_VPROTW_IMM,
22794 IX86_BUILTIN_VPROTD_IMM,
22795 IX86_BUILTIN_VPROTQ_IMM,
22797 IX86_BUILTIN_VPSHLB,
22798 IX86_BUILTIN_VPSHLW,
22799 IX86_BUILTIN_VPSHLD,
22800 IX86_BUILTIN_VPSHLQ,
22801 IX86_BUILTIN_VPSHAB,
22802 IX86_BUILTIN_VPSHAW,
22803 IX86_BUILTIN_VPSHAD,
22804 IX86_BUILTIN_VPSHAQ,
22806 IX86_BUILTIN_VFRCZSS,
22807 IX86_BUILTIN_VFRCZSD,
22808 IX86_BUILTIN_VFRCZPS,
22809 IX86_BUILTIN_VFRCZPD,
22810 IX86_BUILTIN_VFRCZPS256,
22811 IX86_BUILTIN_VFRCZPD256,
22813 IX86_BUILTIN_VPCOMEQUB,
22814 IX86_BUILTIN_VPCOMNEUB,
22815 IX86_BUILTIN_VPCOMLTUB,
22816 IX86_BUILTIN_VPCOMLEUB,
22817 IX86_BUILTIN_VPCOMGTUB,
22818 IX86_BUILTIN_VPCOMGEUB,
22819 IX86_BUILTIN_VPCOMFALSEUB,
22820 IX86_BUILTIN_VPCOMTRUEUB,
22822 IX86_BUILTIN_VPCOMEQUW,
22823 IX86_BUILTIN_VPCOMNEUW,
22824 IX86_BUILTIN_VPCOMLTUW,
22825 IX86_BUILTIN_VPCOMLEUW,
22826 IX86_BUILTIN_VPCOMGTUW,
22827 IX86_BUILTIN_VPCOMGEUW,
22828 IX86_BUILTIN_VPCOMFALSEUW,
22829 IX86_BUILTIN_VPCOMTRUEUW,
22831 IX86_BUILTIN_VPCOMEQUD,
22832 IX86_BUILTIN_VPCOMNEUD,
22833 IX86_BUILTIN_VPCOMLTUD,
22834 IX86_BUILTIN_VPCOMLEUD,
22835 IX86_BUILTIN_VPCOMGTUD,
22836 IX86_BUILTIN_VPCOMGEUD,
22837 IX86_BUILTIN_VPCOMFALSEUD,
22838 IX86_BUILTIN_VPCOMTRUEUD,
22840 IX86_BUILTIN_VPCOMEQUQ,
22841 IX86_BUILTIN_VPCOMNEUQ,
22842 IX86_BUILTIN_VPCOMLTUQ,
22843 IX86_BUILTIN_VPCOMLEUQ,
22844 IX86_BUILTIN_VPCOMGTUQ,
22845 IX86_BUILTIN_VPCOMGEUQ,
22846 IX86_BUILTIN_VPCOMFALSEUQ,
22847 IX86_BUILTIN_VPCOMTRUEUQ,
22849 IX86_BUILTIN_VPCOMEQB,
22850 IX86_BUILTIN_VPCOMNEB,
22851 IX86_BUILTIN_VPCOMLTB,
22852 IX86_BUILTIN_VPCOMLEB,
22853 IX86_BUILTIN_VPCOMGTB,
22854 IX86_BUILTIN_VPCOMGEB,
22855 IX86_BUILTIN_VPCOMFALSEB,
22856 IX86_BUILTIN_VPCOMTRUEB,
22858 IX86_BUILTIN_VPCOMEQW,
22859 IX86_BUILTIN_VPCOMNEW,
22860 IX86_BUILTIN_VPCOMLTW,
22861 IX86_BUILTIN_VPCOMLEW,
22862 IX86_BUILTIN_VPCOMGTW,
22863 IX86_BUILTIN_VPCOMGEW,
22864 IX86_BUILTIN_VPCOMFALSEW,
22865 IX86_BUILTIN_VPCOMTRUEW,
22867 IX86_BUILTIN_VPCOMEQD,
22868 IX86_BUILTIN_VPCOMNED,
22869 IX86_BUILTIN_VPCOMLTD,
22870 IX86_BUILTIN_VPCOMLED,
22871 IX86_BUILTIN_VPCOMGTD,
22872 IX86_BUILTIN_VPCOMGED,
22873 IX86_BUILTIN_VPCOMFALSED,
22874 IX86_BUILTIN_VPCOMTRUED,
22876 IX86_BUILTIN_VPCOMEQQ,
22877 IX86_BUILTIN_VPCOMNEQ,
22878 IX86_BUILTIN_VPCOMLTQ,
22879 IX86_BUILTIN_VPCOMLEQ,
22880 IX86_BUILTIN_VPCOMGTQ,
22881 IX86_BUILTIN_VPCOMGEQ,
22882 IX86_BUILTIN_VPCOMFALSEQ,
22883 IX86_BUILTIN_VPCOMTRUEQ,
22885 /* LWP instructions. */
22886 IX86_BUILTIN_LLWPCB,
22887 IX86_BUILTIN_SLWPCB,
22888 IX86_BUILTIN_LWPVAL32,
22889 IX86_BUILTIN_LWPVAL64,
22890 IX86_BUILTIN_LWPINS32,
22891 IX86_BUILTIN_LWPINS64,
22895 /* FSGSBASE instructions. */
22896 IX86_BUILTIN_RDFSBASE32,
22897 IX86_BUILTIN_RDFSBASE64,
22898 IX86_BUILTIN_RDGSBASE32,
22899 IX86_BUILTIN_RDGSBASE64,
22900 IX86_BUILTIN_WRFSBASE32,
22901 IX86_BUILTIN_WRFSBASE64,
22902 IX86_BUILTIN_WRGSBASE32,
22903 IX86_BUILTIN_WRGSBASE64,
22905 /* RDRND instructions. */
22906 IX86_BUILTIN_RDRAND16,
22907 IX86_BUILTIN_RDRAND32,
22908 IX86_BUILTIN_RDRAND64,
22910 /* F16C instructions. */
22911 IX86_BUILTIN_CVTPH2PS,
22912 IX86_BUILTIN_CVTPH2PS256,
22913 IX86_BUILTIN_CVTPS2PH,
22914 IX86_BUILTIN_CVTPS2PH256,
22919 /* Table for the ix86 builtin decls. */
22920 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22922 /* Table of all of the builtin functions that are possible with different ISA's
22923 but are waiting to be built until a function is declared to use that
22925 struct builtin_isa {
22926 const char *name; /* function name */
22927 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22928 int isa; /* isa_flags this builtin is defined for */
22929 bool const_p; /* true if the declaration is constant */
22930 bool set_and_not_built_p;
22933 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22936 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22937 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22938 function decl in the ix86_builtins array. Returns the function decl or
22939 NULL_TREE, if the builtin was not added.
22941 If the front end has a special hook for builtin functions, delay adding
22942 builtin functions that aren't in the current ISA until the ISA is changed
22943 with function specific optimization. Doing so, can save about 300K for the
22944 default compiler. When the builtin is expanded, check at that time whether
22947 If the front end doesn't have a special hook, record all builtins, even if
22948 it isn't an instruction set in the current ISA in case the user uses
22949 function specific options for a different ISA, so that we don't get scope
22950 errors if a builtin is added in the middle of a function scope. */
22953 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22954 enum ix86_builtins code)
22956 tree decl = NULL_TREE;
22958 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22960 ix86_builtins_isa[(int) code].isa = mask;
22962 mask &= ~OPTION_MASK_ISA_64BIT;
22964 || (mask & ix86_isa_flags) != 0
22965 || (lang_hooks.builtin_function
22966 == lang_hooks.builtin_function_ext_scope))
22969 tree type = ix86_get_builtin_func_type (tcode);
22970 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22972 ix86_builtins[(int) code] = decl;
22973 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22977 ix86_builtins[(int) code] = NULL_TREE;
22978 ix86_builtins_isa[(int) code].tcode = tcode;
22979 ix86_builtins_isa[(int) code].name = name;
22980 ix86_builtins_isa[(int) code].const_p = false;
22981 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22988 /* Like def_builtin, but also marks the function decl "const". */
22991 def_builtin_const (int mask, const char *name,
22992 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22994 tree decl = def_builtin (mask, name, tcode, code);
22996 TREE_READONLY (decl) = 1;
22998 ix86_builtins_isa[(int) code].const_p = true;
23003 /* Add any new builtin functions for a given ISA that may not have been
23004 declared. This saves a bit of space compared to adding all of the
23005 declarations to the tree, even if we didn't use them. */
23008 ix86_add_new_builtins (int isa)
23012 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
23014 if ((ix86_builtins_isa[i].isa & isa) != 0
23015 && ix86_builtins_isa[i].set_and_not_built_p)
23019 /* Don't define the builtin again. */
23020 ix86_builtins_isa[i].set_and_not_built_p = false;
23022 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
23023 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23024 type, i, BUILT_IN_MD, NULL,
23027 ix86_builtins[i] = decl;
23028 if (ix86_builtins_isa[i].const_p)
23029 TREE_READONLY (decl) = 1;
23034 /* Bits for builtin_description.flag. */
23036 /* Set when we don't support the comparison natively, and should
23037 swap_comparison in order to support it. */
23038 #define BUILTIN_DESC_SWAP_OPERANDS 1
23040 struct builtin_description
23042 const unsigned int mask;
23043 const enum insn_code icode;
23044 const char *const name;
23045 const enum ix86_builtins code;
23046 const enum rtx_code comparison;
23050 static const struct builtin_description bdesc_comi[] =
23052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23078 static const struct builtin_description bdesc_pcmpestr[] =
23081 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23082 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23083 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23084 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23085 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23086 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23087 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23090 static const struct builtin_description bdesc_pcmpistr[] =
23093 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23094 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23095 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23096 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23097 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23098 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23099 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23102 /* Special builtins with variable number of arguments. */
23103 static const struct builtin_description bdesc_special_args[] =
23105 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23106 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23112 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23124 /* SSE or 3DNow!A */
23125 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23126 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23143 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23146 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23149 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23150 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23156 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23157 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23158 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23162 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23163 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23174 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23175 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23179 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23180 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23183 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23184 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23185 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23186 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23187 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23188 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23191 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23192 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23193 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23194 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23195 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23196 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23197 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23198 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23201 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23202 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23203 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23206 /* Builtins with variable number of arguments. */
23207 static const struct builtin_description bdesc_args[] =
23209 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23210 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23211 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23212 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23213 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23214 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23215 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23282 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23283 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23284 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23285 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23287 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23288 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23289 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23290 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23291 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23292 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23293 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23294 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23295 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23296 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23297 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23298 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23299 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23300 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23301 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23304 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23305 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23306 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23307 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23308 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23309 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23314 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23316 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23320 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23323 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23327 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23328 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23329 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23359 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23360 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23364 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23366 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23367 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23369 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23374 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23375 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23379 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23381 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23387 /* SSE MMX or 3Dnow!A */
23388 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23389 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23390 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23392 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23393 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23394 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23395 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23397 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23398 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23400 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23405 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23406 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23407 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23408 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23409 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23410 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23411 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23412 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23413 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23414 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23415 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23416 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23435 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23436 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23442 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23443 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23444 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23445 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23473 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23477 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23479 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23480 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23482 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23485 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23486 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23490 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23491 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23492 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23493 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23494 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23495 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23496 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23497 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23508 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23509 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23511 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23513 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23514 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23526 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23527 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23528 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23531 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23532 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23533 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23534 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23535 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23536 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23537 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23538 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23544 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23553 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23558 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23559 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23560 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23561 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23562 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23563 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23566 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23567 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23568 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23569 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23570 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23571 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23573 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23574 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23575 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23576 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23584 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23585 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23590 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23591 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23594 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23595 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23597 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23598 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23599 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23600 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23601 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23602 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23605 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23610 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23612 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23613 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23614 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23615 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23616 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23617 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23618 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23619 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23620 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23621 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23622 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23623 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23624 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23625 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23626 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23627 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23628 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23629 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23630 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23631 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23632 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23633 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23634 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23635 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23638 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23639 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23642 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23647 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23648 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23649 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23650 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23651 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23653 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23654 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23655 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23656 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23657 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23658 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23659 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23660 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23661 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23662 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23663 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23664 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23665 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23667 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23668 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23669 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23670 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23671 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23672 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23673 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23674 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23675 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23676 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23681 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23682 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23683 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23684 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23686 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23687 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23688 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23691 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23692 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23693 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23694 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23695 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23698 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23699 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23700 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23701 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23704 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23705 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23707 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23708 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23709 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23710 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23713 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23716 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23717 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23720 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23721 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23724 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23730 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23731 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23732 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23733 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23734 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23735 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23736 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23737 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23738 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23739 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23740 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23741 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23787 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23789 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23791 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23806 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23807 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23808 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23829 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23832 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23833 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23834 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23835 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23838 /* FMA4 and XOP. */
23839 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23840 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23841 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23842 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23843 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23844 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23845 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23846 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23847 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23848 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23849 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23850 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23851 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23852 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23853 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23854 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23855 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23856 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23857 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23858 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23859 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23860 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23861 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23862 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23863 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23864 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23865 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23866 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23867 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23868 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23869 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23870 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23871 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23872 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23873 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23874 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23875 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23876 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23877 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23878 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23879 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23880 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23881 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23882 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23883 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23884 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23885 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23886 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23887 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23888 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23889 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23890 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23892 static const struct builtin_description bdesc_multi_arg[] =
23894 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23895 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23896 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23897 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23898 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23899 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23900 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23901 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23903 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23904 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23905 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23906 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23907 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23908 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23909 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23910 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23912 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23913 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23914 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23915 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23917 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23918 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23919 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23920 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23922 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23923 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23924 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23925 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23927 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23928 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23929 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23930 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23932 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23934 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23935 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23936 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23938 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23941 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23943 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23945 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23951 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23952 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23953 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23992 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
24004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
24007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
24008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
24009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
24011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
24012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
24015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
24016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
24017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
24019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
24020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
24023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
24024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
24025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
24027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
24031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
24032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
24033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24092 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24093 in the current target ISA to allow the user to compile particular modules
24094 with different target specific options that differ from the command line
24097 ix86_init_mmx_sse_builtins (void)
24099 const struct builtin_description * d;
24100 enum ix86_builtin_func_type ftype;
24103 /* Add all special builtins with variable number of operands. */
24104 for (i = 0, d = bdesc_special_args;
24105 i < ARRAY_SIZE (bdesc_special_args);
24111 ftype = (enum ix86_builtin_func_type) d->flag;
24112 def_builtin (d->mask, d->name, ftype, d->code);
24115 /* Add all builtins with variable number of operands. */
24116 for (i = 0, d = bdesc_args;
24117 i < ARRAY_SIZE (bdesc_args);
24123 ftype = (enum ix86_builtin_func_type) d->flag;
24124 def_builtin_const (d->mask, d->name, ftype, d->code);
24127 /* pcmpestr[im] insns. */
24128 for (i = 0, d = bdesc_pcmpestr;
24129 i < ARRAY_SIZE (bdesc_pcmpestr);
24132 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24133 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24135 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24136 def_builtin_const (d->mask, d->name, ftype, d->code);
24139 /* pcmpistr[im] insns. */
24140 for (i = 0, d = bdesc_pcmpistr;
24141 i < ARRAY_SIZE (bdesc_pcmpistr);
24144 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24145 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24147 ftype = INT_FTYPE_V16QI_V16QI_INT;
24148 def_builtin_const (d->mask, d->name, ftype, d->code);
24151 /* comi/ucomi insns. */
24152 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24154 if (d->mask == OPTION_MASK_ISA_SSE2)
24155 ftype = INT_FTYPE_V2DF_V2DF;
24157 ftype = INT_FTYPE_V4SF_V4SF;
24158 def_builtin_const (d->mask, d->name, ftype, d->code);
24162 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24163 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24164 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24165 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24167 /* SSE or 3DNow!A */
24168 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24169 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24170 IX86_BUILTIN_MASKMOVQ);
24173 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24174 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24176 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24177 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24178 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24179 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24182 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24183 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24184 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24185 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24188 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24189 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24190 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24191 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24192 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24193 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24194 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24195 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24196 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24197 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24198 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24199 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24202 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24203 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24205 /* MMX access to the vec_init patterns. */
24206 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24207 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24209 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24210 V4HI_FTYPE_HI_HI_HI_HI,
24211 IX86_BUILTIN_VEC_INIT_V4HI);
24213 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24214 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24215 IX86_BUILTIN_VEC_INIT_V8QI);
24217 /* Access to the vec_extract patterns. */
24218 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24219 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24220 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24221 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24222 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24223 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24224 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24225 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24226 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24227 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24229 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24230 "__builtin_ia32_vec_ext_v4hi",
24231 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24233 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24234 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24236 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24237 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24239 /* Access to the vec_set patterns. */
24240 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24241 "__builtin_ia32_vec_set_v2di",
24242 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24244 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24245 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24247 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24248 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24250 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24251 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24253 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24254 "__builtin_ia32_vec_set_v4hi",
24255 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24257 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24258 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24260 /* Add FMA4 multi-arg argument instructions */
24261 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24266 ftype = (enum ix86_builtin_func_type) d->flag;
24267 def_builtin_const (d->mask, d->name, ftype, d->code);
24271 /* Internal method for ix86_init_builtins. */
24274 ix86_init_builtins_va_builtins_abi (void)
24276 tree ms_va_ref, sysv_va_ref;
24277 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24278 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24279 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24280 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24284 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24285 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24286 ms_va_ref = build_reference_type (ms_va_list_type_node);
24288 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24291 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24292 fnvoid_va_start_ms =
24293 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24294 fnvoid_va_end_sysv =
24295 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24296 fnvoid_va_start_sysv =
24297 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24299 fnvoid_va_copy_ms =
24300 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24302 fnvoid_va_copy_sysv =
24303 build_function_type_list (void_type_node, sysv_va_ref,
24304 sysv_va_ref, NULL_TREE);
24306 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24307 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24308 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24309 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24310 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24311 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24312 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24313 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24314 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24315 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24316 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24317 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24321 ix86_init_builtin_types (void)
24323 tree float128_type_node, float80_type_node;
24325 /* The __float80 type. */
24326 float80_type_node = long_double_type_node;
24327 if (TYPE_MODE (float80_type_node) != XFmode)
24329 /* The __float80 type. */
24330 float80_type_node = make_node (REAL_TYPE);
24332 TYPE_PRECISION (float80_type_node) = 80;
24333 layout_type (float80_type_node);
24335 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24337 /* The __float128 type. */
24338 float128_type_node = make_node (REAL_TYPE);
24339 TYPE_PRECISION (float128_type_node) = 128;
24340 layout_type (float128_type_node);
24341 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24343 /* This macro is built by i386-builtin-types.awk. */
24344 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24348 ix86_init_builtins (void)
24352 ix86_init_builtin_types ();
24354 /* TFmode support builtins. */
24355 def_builtin_const (0, "__builtin_infq",
24356 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24357 def_builtin_const (0, "__builtin_huge_valq",
24358 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24360 /* We will expand them to normal call if SSE2 isn't available since
24361 they are used by libgcc. */
24362 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24363 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24364 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24365 TREE_READONLY (t) = 1;
24366 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24368 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24369 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24370 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24371 TREE_READONLY (t) = 1;
24372 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24374 ix86_init_mmx_sse_builtins ();
24377 ix86_init_builtins_va_builtins_abi ();
24380 /* Return the ix86 builtin for CODE. */
24383 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24385 if (code >= IX86_BUILTIN_MAX)
24386 return error_mark_node;
24388 return ix86_builtins[code];
24391 /* Errors in the source file can cause expand_expr to return const0_rtx
24392 where we expect a vector. To avoid crashing, use one of the vector
24393 clear instructions. */
24395 safe_vector_operand (rtx x, enum machine_mode mode)
24397 if (x == const0_rtx)
24398 x = CONST0_RTX (mode);
24402 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24405 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24408 tree arg0 = CALL_EXPR_ARG (exp, 0);
24409 tree arg1 = CALL_EXPR_ARG (exp, 1);
24410 rtx op0 = expand_normal (arg0);
24411 rtx op1 = expand_normal (arg1);
24412 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24413 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24414 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24416 if (VECTOR_MODE_P (mode0))
24417 op0 = safe_vector_operand (op0, mode0);
24418 if (VECTOR_MODE_P (mode1))
24419 op1 = safe_vector_operand (op1, mode1);
24421 if (optimize || !target
24422 || GET_MODE (target) != tmode
24423 || !insn_data[icode].operand[0].predicate (target, tmode))
24424 target = gen_reg_rtx (tmode);
24426 if (GET_MODE (op1) == SImode && mode1 == TImode)
24428 rtx x = gen_reg_rtx (V4SImode);
24429 emit_insn (gen_sse2_loadd (x, op1));
24430 op1 = gen_lowpart (TImode, x);
24433 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24434 op0 = copy_to_mode_reg (mode0, op0);
24435 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24436 op1 = copy_to_mode_reg (mode1, op1);
24438 pat = GEN_FCN (icode) (target, op0, op1);
24447 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24450 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24451 enum ix86_builtin_func_type m_type,
24452 enum rtx_code sub_code)
24457 bool comparison_p = false;
24459 bool last_arg_constant = false;
24460 int num_memory = 0;
24463 enum machine_mode mode;
24466 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24470 case MULTI_ARG_4_DF2_DI_I:
24471 case MULTI_ARG_4_DF2_DI_I1:
24472 case MULTI_ARG_4_SF2_SI_I:
24473 case MULTI_ARG_4_SF2_SI_I1:
24475 last_arg_constant = true;
24478 case MULTI_ARG_3_SF:
24479 case MULTI_ARG_3_DF:
24480 case MULTI_ARG_3_SF2:
24481 case MULTI_ARG_3_DF2:
24482 case MULTI_ARG_3_DI:
24483 case MULTI_ARG_3_SI:
24484 case MULTI_ARG_3_SI_DI:
24485 case MULTI_ARG_3_HI:
24486 case MULTI_ARG_3_HI_SI:
24487 case MULTI_ARG_3_QI:
24488 case MULTI_ARG_3_DI2:
24489 case MULTI_ARG_3_SI2:
24490 case MULTI_ARG_3_HI2:
24491 case MULTI_ARG_3_QI2:
24495 case MULTI_ARG_2_SF:
24496 case MULTI_ARG_2_DF:
24497 case MULTI_ARG_2_DI:
24498 case MULTI_ARG_2_SI:
24499 case MULTI_ARG_2_HI:
24500 case MULTI_ARG_2_QI:
24504 case MULTI_ARG_2_DI_IMM:
24505 case MULTI_ARG_2_SI_IMM:
24506 case MULTI_ARG_2_HI_IMM:
24507 case MULTI_ARG_2_QI_IMM:
24509 last_arg_constant = true;
24512 case MULTI_ARG_1_SF:
24513 case MULTI_ARG_1_DF:
24514 case MULTI_ARG_1_SF2:
24515 case MULTI_ARG_1_DF2:
24516 case MULTI_ARG_1_DI:
24517 case MULTI_ARG_1_SI:
24518 case MULTI_ARG_1_HI:
24519 case MULTI_ARG_1_QI:
24520 case MULTI_ARG_1_SI_DI:
24521 case MULTI_ARG_1_HI_DI:
24522 case MULTI_ARG_1_HI_SI:
24523 case MULTI_ARG_1_QI_DI:
24524 case MULTI_ARG_1_QI_SI:
24525 case MULTI_ARG_1_QI_HI:
24529 case MULTI_ARG_2_DI_CMP:
24530 case MULTI_ARG_2_SI_CMP:
24531 case MULTI_ARG_2_HI_CMP:
24532 case MULTI_ARG_2_QI_CMP:
24534 comparison_p = true;
24537 case MULTI_ARG_2_SF_TF:
24538 case MULTI_ARG_2_DF_TF:
24539 case MULTI_ARG_2_DI_TF:
24540 case MULTI_ARG_2_SI_TF:
24541 case MULTI_ARG_2_HI_TF:
24542 case MULTI_ARG_2_QI_TF:
24548 gcc_unreachable ();
24551 if (optimize || !target
24552 || GET_MODE (target) != tmode
24553 || !insn_data[icode].operand[0].predicate (target, tmode))
24554 target = gen_reg_rtx (tmode);
24556 gcc_assert (nargs <= 4);
24558 for (i = 0; i < nargs; i++)
24560 tree arg = CALL_EXPR_ARG (exp, i);
24561 rtx op = expand_normal (arg);
24562 int adjust = (comparison_p) ? 1 : 0;
24563 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24565 if (last_arg_constant && i == nargs-1)
24567 if (!CONST_INT_P (op))
24569 error ("last argument must be an immediate");
24570 return gen_reg_rtx (tmode);
24575 if (VECTOR_MODE_P (mode))
24576 op = safe_vector_operand (op, mode);
24578 /* If we aren't optimizing, only allow one memory operand to be
24580 if (memory_operand (op, mode))
24583 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24586 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24588 op = force_reg (mode, op);
24592 args[i].mode = mode;
24598 pat = GEN_FCN (icode) (target, args[0].op);
24603 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24604 GEN_INT ((int)sub_code));
24605 else if (! comparison_p)
24606 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24609 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24613 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24618 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24622 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24626 gcc_unreachable ();
24636 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24637 insns with vec_merge. */
24640 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24644 tree arg0 = CALL_EXPR_ARG (exp, 0);
24645 rtx op1, op0 = expand_normal (arg0);
24646 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24647 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24649 if (optimize || !target
24650 || GET_MODE (target) != tmode
24651 || !insn_data[icode].operand[0].predicate (target, tmode))
24652 target = gen_reg_rtx (tmode);
24654 if (VECTOR_MODE_P (mode0))
24655 op0 = safe_vector_operand (op0, mode0);
24657 if ((optimize && !register_operand (op0, mode0))
24658 || !insn_data[icode].operand[1].predicate (op0, mode0))
24659 op0 = copy_to_mode_reg (mode0, op0);
24662 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24663 op1 = copy_to_mode_reg (mode0, op1);
24665 pat = GEN_FCN (icode) (target, op0, op1);
24672 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24675 ix86_expand_sse_compare (const struct builtin_description *d,
24676 tree exp, rtx target, bool swap)
24679 tree arg0 = CALL_EXPR_ARG (exp, 0);
24680 tree arg1 = CALL_EXPR_ARG (exp, 1);
24681 rtx op0 = expand_normal (arg0);
24682 rtx op1 = expand_normal (arg1);
24684 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24685 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24686 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24687 enum rtx_code comparison = d->comparison;
24689 if (VECTOR_MODE_P (mode0))
24690 op0 = safe_vector_operand (op0, mode0);
24691 if (VECTOR_MODE_P (mode1))
24692 op1 = safe_vector_operand (op1, mode1);
24694 /* Swap operands if we have a comparison that isn't available in
24698 rtx tmp = gen_reg_rtx (mode1);
24699 emit_move_insn (tmp, op1);
24704 if (optimize || !target
24705 || GET_MODE (target) != tmode
24706 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24707 target = gen_reg_rtx (tmode);
24709 if ((optimize && !register_operand (op0, mode0))
24710 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24711 op0 = copy_to_mode_reg (mode0, op0);
24712 if ((optimize && !register_operand (op1, mode1))
24713 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24714 op1 = copy_to_mode_reg (mode1, op1);
24716 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24717 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24724 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24727 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24731 tree arg0 = CALL_EXPR_ARG (exp, 0);
24732 tree arg1 = CALL_EXPR_ARG (exp, 1);
24733 rtx op0 = expand_normal (arg0);
24734 rtx op1 = expand_normal (arg1);
24735 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24736 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24737 enum rtx_code comparison = d->comparison;
24739 if (VECTOR_MODE_P (mode0))
24740 op0 = safe_vector_operand (op0, mode0);
24741 if (VECTOR_MODE_P (mode1))
24742 op1 = safe_vector_operand (op1, mode1);
24744 /* Swap operands if we have a comparison that isn't available in
24746 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24753 target = gen_reg_rtx (SImode);
24754 emit_move_insn (target, const0_rtx);
24755 target = gen_rtx_SUBREG (QImode, target, 0);
24757 if ((optimize && !register_operand (op0, mode0))
24758 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24759 op0 = copy_to_mode_reg (mode0, op0);
24760 if ((optimize && !register_operand (op1, mode1))
24761 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24762 op1 = copy_to_mode_reg (mode1, op1);
24764 pat = GEN_FCN (d->icode) (op0, op1);
24768 emit_insn (gen_rtx_SET (VOIDmode,
24769 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24770 gen_rtx_fmt_ee (comparison, QImode,
24774 return SUBREG_REG (target);
24777 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24780 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24784 tree arg0 = CALL_EXPR_ARG (exp, 0);
24785 tree arg1 = CALL_EXPR_ARG (exp, 1);
24786 rtx op0 = expand_normal (arg0);
24787 rtx op1 = expand_normal (arg1);
24788 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24789 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24790 enum rtx_code comparison = d->comparison;
24792 if (VECTOR_MODE_P (mode0))
24793 op0 = safe_vector_operand (op0, mode0);
24794 if (VECTOR_MODE_P (mode1))
24795 op1 = safe_vector_operand (op1, mode1);
24797 target = gen_reg_rtx (SImode);
24798 emit_move_insn (target, const0_rtx);
24799 target = gen_rtx_SUBREG (QImode, target, 0);
24801 if ((optimize && !register_operand (op0, mode0))
24802 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24803 op0 = copy_to_mode_reg (mode0, op0);
24804 if ((optimize && !register_operand (op1, mode1))
24805 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24806 op1 = copy_to_mode_reg (mode1, op1);
24808 pat = GEN_FCN (d->icode) (op0, op1);
24812 emit_insn (gen_rtx_SET (VOIDmode,
24813 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24814 gen_rtx_fmt_ee (comparison, QImode,
24818 return SUBREG_REG (target);
24821 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24824 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24825 tree exp, rtx target)
24828 tree arg0 = CALL_EXPR_ARG (exp, 0);
24829 tree arg1 = CALL_EXPR_ARG (exp, 1);
24830 tree arg2 = CALL_EXPR_ARG (exp, 2);
24831 tree arg3 = CALL_EXPR_ARG (exp, 3);
24832 tree arg4 = CALL_EXPR_ARG (exp, 4);
24833 rtx scratch0, scratch1;
24834 rtx op0 = expand_normal (arg0);
24835 rtx op1 = expand_normal (arg1);
24836 rtx op2 = expand_normal (arg2);
24837 rtx op3 = expand_normal (arg3);
24838 rtx op4 = expand_normal (arg4);
24839 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24841 tmode0 = insn_data[d->icode].operand[0].mode;
24842 tmode1 = insn_data[d->icode].operand[1].mode;
24843 modev2 = insn_data[d->icode].operand[2].mode;
24844 modei3 = insn_data[d->icode].operand[3].mode;
24845 modev4 = insn_data[d->icode].operand[4].mode;
24846 modei5 = insn_data[d->icode].operand[5].mode;
24847 modeimm = insn_data[d->icode].operand[6].mode;
24849 if (VECTOR_MODE_P (modev2))
24850 op0 = safe_vector_operand (op0, modev2);
24851 if (VECTOR_MODE_P (modev4))
24852 op2 = safe_vector_operand (op2, modev4);
24854 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24855 op0 = copy_to_mode_reg (modev2, op0);
24856 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24857 op1 = copy_to_mode_reg (modei3, op1);
24858 if ((optimize && !register_operand (op2, modev4))
24859 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24860 op2 = copy_to_mode_reg (modev4, op2);
24861 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24862 op3 = copy_to_mode_reg (modei5, op3);
24864 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24866 error ("the fifth argument must be a 8-bit immediate");
24870 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24872 if (optimize || !target
24873 || GET_MODE (target) != tmode0
24874 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24875 target = gen_reg_rtx (tmode0);
24877 scratch1 = gen_reg_rtx (tmode1);
24879 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24881 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24883 if (optimize || !target
24884 || GET_MODE (target) != tmode1
24885 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24886 target = gen_reg_rtx (tmode1);
24888 scratch0 = gen_reg_rtx (tmode0);
24890 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24894 gcc_assert (d->flag);
24896 scratch0 = gen_reg_rtx (tmode0);
24897 scratch1 = gen_reg_rtx (tmode1);
24899 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24909 target = gen_reg_rtx (SImode);
24910 emit_move_insn (target, const0_rtx);
24911 target = gen_rtx_SUBREG (QImode, target, 0);
24914 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24915 gen_rtx_fmt_ee (EQ, QImode,
24916 gen_rtx_REG ((enum machine_mode) d->flag,
24919 return SUBREG_REG (target);
24926 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24929 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24930 tree exp, rtx target)
24933 tree arg0 = CALL_EXPR_ARG (exp, 0);
24934 tree arg1 = CALL_EXPR_ARG (exp, 1);
24935 tree arg2 = CALL_EXPR_ARG (exp, 2);
24936 rtx scratch0, scratch1;
24937 rtx op0 = expand_normal (arg0);
24938 rtx op1 = expand_normal (arg1);
24939 rtx op2 = expand_normal (arg2);
24940 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24942 tmode0 = insn_data[d->icode].operand[0].mode;
24943 tmode1 = insn_data[d->icode].operand[1].mode;
24944 modev2 = insn_data[d->icode].operand[2].mode;
24945 modev3 = insn_data[d->icode].operand[3].mode;
24946 modeimm = insn_data[d->icode].operand[4].mode;
24948 if (VECTOR_MODE_P (modev2))
24949 op0 = safe_vector_operand (op0, modev2);
24950 if (VECTOR_MODE_P (modev3))
24951 op1 = safe_vector_operand (op1, modev3);
24953 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24954 op0 = copy_to_mode_reg (modev2, op0);
24955 if ((optimize && !register_operand (op1, modev3))
24956 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24957 op1 = copy_to_mode_reg (modev3, op1);
24959 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24961 error ("the third argument must be a 8-bit immediate");
24965 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24967 if (optimize || !target
24968 || GET_MODE (target) != tmode0
24969 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24970 target = gen_reg_rtx (tmode0);
24972 scratch1 = gen_reg_rtx (tmode1);
24974 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24976 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24978 if (optimize || !target
24979 || GET_MODE (target) != tmode1
24980 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24981 target = gen_reg_rtx (tmode1);
24983 scratch0 = gen_reg_rtx (tmode0);
24985 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24989 gcc_assert (d->flag);
24991 scratch0 = gen_reg_rtx (tmode0);
24992 scratch1 = gen_reg_rtx (tmode1);
24994 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
25004 target = gen_reg_rtx (SImode);
25005 emit_move_insn (target, const0_rtx);
25006 target = gen_rtx_SUBREG (QImode, target, 0);
25009 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25010 gen_rtx_fmt_ee (EQ, QImode,
25011 gen_rtx_REG ((enum machine_mode) d->flag,
25014 return SUBREG_REG (target);
25020 /* Subroutine of ix86_expand_builtin to take care of insns with
25021 variable number of operands. */
25024 ix86_expand_args_builtin (const struct builtin_description *d,
25025 tree exp, rtx target)
25027 rtx pat, real_target;
25028 unsigned int i, nargs;
25029 unsigned int nargs_constant = 0;
25030 int num_memory = 0;
25034 enum machine_mode mode;
25036 bool last_arg_count = false;
25037 enum insn_code icode = d->icode;
25038 const struct insn_data_d *insn_p = &insn_data[icode];
25039 enum machine_mode tmode = insn_p->operand[0].mode;
25040 enum machine_mode rmode = VOIDmode;
25042 enum rtx_code comparison = d->comparison;
25044 switch ((enum ix86_builtin_func_type) d->flag)
25046 case INT_FTYPE_V8SF_V8SF_PTEST:
25047 case INT_FTYPE_V4DI_V4DI_PTEST:
25048 case INT_FTYPE_V4DF_V4DF_PTEST:
25049 case INT_FTYPE_V4SF_V4SF_PTEST:
25050 case INT_FTYPE_V2DI_V2DI_PTEST:
25051 case INT_FTYPE_V2DF_V2DF_PTEST:
25052 return ix86_expand_sse_ptest (d, exp, target);
25053 case FLOAT128_FTYPE_FLOAT128:
25054 case FLOAT_FTYPE_FLOAT:
25055 case INT_FTYPE_INT:
25056 case UINT64_FTYPE_INT:
25057 case UINT16_FTYPE_UINT16:
25058 case INT64_FTYPE_INT64:
25059 case INT64_FTYPE_V4SF:
25060 case INT64_FTYPE_V2DF:
25061 case INT_FTYPE_V16QI:
25062 case INT_FTYPE_V8QI:
25063 case INT_FTYPE_V8SF:
25064 case INT_FTYPE_V4DF:
25065 case INT_FTYPE_V4SF:
25066 case INT_FTYPE_V2DF:
25067 case V16QI_FTYPE_V16QI:
25068 case V8SI_FTYPE_V8SF:
25069 case V8SI_FTYPE_V4SI:
25070 case V8HI_FTYPE_V8HI:
25071 case V8HI_FTYPE_V16QI:
25072 case V8QI_FTYPE_V8QI:
25073 case V8SF_FTYPE_V8SF:
25074 case V8SF_FTYPE_V8SI:
25075 case V8SF_FTYPE_V4SF:
25076 case V8SF_FTYPE_V8HI:
25077 case V4SI_FTYPE_V4SI:
25078 case V4SI_FTYPE_V16QI:
25079 case V4SI_FTYPE_V4SF:
25080 case V4SI_FTYPE_V8SI:
25081 case V4SI_FTYPE_V8HI:
25082 case V4SI_FTYPE_V4DF:
25083 case V4SI_FTYPE_V2DF:
25084 case V4HI_FTYPE_V4HI:
25085 case V4DF_FTYPE_V4DF:
25086 case V4DF_FTYPE_V4SI:
25087 case V4DF_FTYPE_V4SF:
25088 case V4DF_FTYPE_V2DF:
25089 case V4SF_FTYPE_V4SF:
25090 case V4SF_FTYPE_V4SI:
25091 case V4SF_FTYPE_V8SF:
25092 case V4SF_FTYPE_V4DF:
25093 case V4SF_FTYPE_V8HI:
25094 case V4SF_FTYPE_V2DF:
25095 case V2DI_FTYPE_V2DI:
25096 case V2DI_FTYPE_V16QI:
25097 case V2DI_FTYPE_V8HI:
25098 case V2DI_FTYPE_V4SI:
25099 case V2DF_FTYPE_V2DF:
25100 case V2DF_FTYPE_V4SI:
25101 case V2DF_FTYPE_V4DF:
25102 case V2DF_FTYPE_V4SF:
25103 case V2DF_FTYPE_V2SI:
25104 case V2SI_FTYPE_V2SI:
25105 case V2SI_FTYPE_V4SF:
25106 case V2SI_FTYPE_V2SF:
25107 case V2SI_FTYPE_V2DF:
25108 case V2SF_FTYPE_V2SF:
25109 case V2SF_FTYPE_V2SI:
25112 case V4SF_FTYPE_V4SF_VEC_MERGE:
25113 case V2DF_FTYPE_V2DF_VEC_MERGE:
25114 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25115 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25116 case V16QI_FTYPE_V16QI_V16QI:
25117 case V16QI_FTYPE_V8HI_V8HI:
25118 case V8QI_FTYPE_V8QI_V8QI:
25119 case V8QI_FTYPE_V4HI_V4HI:
25120 case V8HI_FTYPE_V8HI_V8HI:
25121 case V8HI_FTYPE_V16QI_V16QI:
25122 case V8HI_FTYPE_V4SI_V4SI:
25123 case V8SF_FTYPE_V8SF_V8SF:
25124 case V8SF_FTYPE_V8SF_V8SI:
25125 case V4SI_FTYPE_V4SI_V4SI:
25126 case V4SI_FTYPE_V8HI_V8HI:
25127 case V4SI_FTYPE_V4SF_V4SF:
25128 case V4SI_FTYPE_V2DF_V2DF:
25129 case V4HI_FTYPE_V4HI_V4HI:
25130 case V4HI_FTYPE_V8QI_V8QI:
25131 case V4HI_FTYPE_V2SI_V2SI:
25132 case V4DF_FTYPE_V4DF_V4DF:
25133 case V4DF_FTYPE_V4DF_V4DI:
25134 case V4SF_FTYPE_V4SF_V4SF:
25135 case V4SF_FTYPE_V4SF_V4SI:
25136 case V4SF_FTYPE_V4SF_V2SI:
25137 case V4SF_FTYPE_V4SF_V2DF:
25138 case V4SF_FTYPE_V4SF_DI:
25139 case V4SF_FTYPE_V4SF_SI:
25140 case V2DI_FTYPE_V2DI_V2DI:
25141 case V2DI_FTYPE_V16QI_V16QI:
25142 case V2DI_FTYPE_V4SI_V4SI:
25143 case V2DI_FTYPE_V2DI_V16QI:
25144 case V2DI_FTYPE_V2DF_V2DF:
25145 case V2SI_FTYPE_V2SI_V2SI:
25146 case V2SI_FTYPE_V4HI_V4HI:
25147 case V2SI_FTYPE_V2SF_V2SF:
25148 case V2DF_FTYPE_V2DF_V2DF:
25149 case V2DF_FTYPE_V2DF_V4SF:
25150 case V2DF_FTYPE_V2DF_V2DI:
25151 case V2DF_FTYPE_V2DF_DI:
25152 case V2DF_FTYPE_V2DF_SI:
25153 case V2SF_FTYPE_V2SF_V2SF:
25154 case V1DI_FTYPE_V1DI_V1DI:
25155 case V1DI_FTYPE_V8QI_V8QI:
25156 case V1DI_FTYPE_V2SI_V2SI:
25157 if (comparison == UNKNOWN)
25158 return ix86_expand_binop_builtin (icode, exp, target);
25161 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25162 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25163 gcc_assert (comparison != UNKNOWN);
25167 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25168 case V8HI_FTYPE_V8HI_SI_COUNT:
25169 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25170 case V4SI_FTYPE_V4SI_SI_COUNT:
25171 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25172 case V4HI_FTYPE_V4HI_SI_COUNT:
25173 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25174 case V2DI_FTYPE_V2DI_SI_COUNT:
25175 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25176 case V2SI_FTYPE_V2SI_SI_COUNT:
25177 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25178 case V1DI_FTYPE_V1DI_SI_COUNT:
25180 last_arg_count = true;
25182 case UINT64_FTYPE_UINT64_UINT64:
25183 case UINT_FTYPE_UINT_UINT:
25184 case UINT_FTYPE_UINT_USHORT:
25185 case UINT_FTYPE_UINT_UCHAR:
25186 case UINT16_FTYPE_UINT16_INT:
25187 case UINT8_FTYPE_UINT8_INT:
25190 case V2DI_FTYPE_V2DI_INT_CONVERT:
25193 nargs_constant = 1;
25195 case V8HI_FTYPE_V8HI_INT:
25196 case V8HI_FTYPE_V8SF_INT:
25197 case V8HI_FTYPE_V4SF_INT:
25198 case V8SF_FTYPE_V8SF_INT:
25199 case V4SI_FTYPE_V4SI_INT:
25200 case V4SI_FTYPE_V8SI_INT:
25201 case V4HI_FTYPE_V4HI_INT:
25202 case V4DF_FTYPE_V4DF_INT:
25203 case V4SF_FTYPE_V4SF_INT:
25204 case V4SF_FTYPE_V8SF_INT:
25205 case V2DI_FTYPE_V2DI_INT:
25206 case V2DF_FTYPE_V2DF_INT:
25207 case V2DF_FTYPE_V4DF_INT:
25209 nargs_constant = 1;
25211 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25212 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25213 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25214 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25215 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25218 case V16QI_FTYPE_V16QI_V16QI_INT:
25219 case V8HI_FTYPE_V8HI_V8HI_INT:
25220 case V8SI_FTYPE_V8SI_V8SI_INT:
25221 case V8SI_FTYPE_V8SI_V4SI_INT:
25222 case V8SF_FTYPE_V8SF_V8SF_INT:
25223 case V8SF_FTYPE_V8SF_V4SF_INT:
25224 case V4SI_FTYPE_V4SI_V4SI_INT:
25225 case V4DF_FTYPE_V4DF_V4DF_INT:
25226 case V4DF_FTYPE_V4DF_V2DF_INT:
25227 case V4SF_FTYPE_V4SF_V4SF_INT:
25228 case V2DI_FTYPE_V2DI_V2DI_INT:
25229 case V2DF_FTYPE_V2DF_V2DF_INT:
25231 nargs_constant = 1;
25233 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25236 nargs_constant = 1;
25238 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25241 nargs_constant = 1;
25243 case V2DI_FTYPE_V2DI_UINT_UINT:
25245 nargs_constant = 2;
25247 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25248 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25249 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25250 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25252 nargs_constant = 1;
25254 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25256 nargs_constant = 2;
25259 gcc_unreachable ();
25262 gcc_assert (nargs <= ARRAY_SIZE (args));
25264 if (comparison != UNKNOWN)
25266 gcc_assert (nargs == 2);
25267 return ix86_expand_sse_compare (d, exp, target, swap);
25270 if (rmode == VOIDmode || rmode == tmode)
25274 || GET_MODE (target) != tmode
25275 || !insn_p->operand[0].predicate (target, tmode))
25276 target = gen_reg_rtx (tmode);
25277 real_target = target;
25281 target = gen_reg_rtx (rmode);
25282 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25285 for (i = 0; i < nargs; i++)
25287 tree arg = CALL_EXPR_ARG (exp, i);
25288 rtx op = expand_normal (arg);
25289 enum machine_mode mode = insn_p->operand[i + 1].mode;
25290 bool match = insn_p->operand[i + 1].predicate (op, mode);
25292 if (last_arg_count && (i + 1) == nargs)
25294 /* SIMD shift insns take either an 8-bit immediate or
25295 register as count. But builtin functions take int as
25296 count. If count doesn't match, we put it in register. */
25299 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25300 if (!insn_p->operand[i + 1].predicate (op, mode))
25301 op = copy_to_reg (op);
25304 else if ((nargs - i) <= nargs_constant)
25309 case CODE_FOR_sse4_1_roundpd:
25310 case CODE_FOR_sse4_1_roundps:
25311 case CODE_FOR_sse4_1_roundsd:
25312 case CODE_FOR_sse4_1_roundss:
25313 case CODE_FOR_sse4_1_blendps:
25314 case CODE_FOR_avx_blendpd256:
25315 case CODE_FOR_avx_vpermilv4df:
25316 case CODE_FOR_avx_roundpd256:
25317 case CODE_FOR_avx_roundps256:
25318 error ("the last argument must be a 4-bit immediate");
25321 case CODE_FOR_sse4_1_blendpd:
25322 case CODE_FOR_avx_vpermilv2df:
25323 case CODE_FOR_xop_vpermil2v2df3:
25324 case CODE_FOR_xop_vpermil2v4sf3:
25325 case CODE_FOR_xop_vpermil2v4df3:
25326 case CODE_FOR_xop_vpermil2v8sf3:
25327 error ("the last argument must be a 2-bit immediate");
25330 case CODE_FOR_avx_vextractf128v4df:
25331 case CODE_FOR_avx_vextractf128v8sf:
25332 case CODE_FOR_avx_vextractf128v8si:
25333 case CODE_FOR_avx_vinsertf128v4df:
25334 case CODE_FOR_avx_vinsertf128v8sf:
25335 case CODE_FOR_avx_vinsertf128v8si:
25336 error ("the last argument must be a 1-bit immediate");
25339 case CODE_FOR_avx_cmpsdv2df3:
25340 case CODE_FOR_avx_cmpssv4sf3:
25341 case CODE_FOR_avx_cmppdv2df3:
25342 case CODE_FOR_avx_cmppsv4sf3:
25343 case CODE_FOR_avx_cmppdv4df3:
25344 case CODE_FOR_avx_cmppsv8sf3:
25345 error ("the last argument must be a 5-bit immediate");
25349 switch (nargs_constant)
25352 if ((nargs - i) == nargs_constant)
25354 error ("the next to last argument must be an 8-bit immediate");
25358 error ("the last argument must be an 8-bit immediate");
25361 gcc_unreachable ();
25368 if (VECTOR_MODE_P (mode))
25369 op = safe_vector_operand (op, mode);
25371 /* If we aren't optimizing, only allow one memory operand to
25373 if (memory_operand (op, mode))
25376 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25378 if (optimize || !match || num_memory > 1)
25379 op = copy_to_mode_reg (mode, op);
25383 op = copy_to_reg (op);
25384 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25389 args[i].mode = mode;
25395 pat = GEN_FCN (icode) (real_target, args[0].op);
25398 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25401 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25405 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25406 args[2].op, args[3].op);
25409 gcc_unreachable ();
25419 /* Subroutine of ix86_expand_builtin to take care of special insns
25420 with variable number of operands. */
25423 ix86_expand_special_args_builtin (const struct builtin_description *d,
25424 tree exp, rtx target)
25428 unsigned int i, nargs, arg_adjust, memory;
25432 enum machine_mode mode;
25434 enum insn_code icode = d->icode;
25435 bool last_arg_constant = false;
25436 const struct insn_data_d *insn_p = &insn_data[icode];
25437 enum machine_mode tmode = insn_p->operand[0].mode;
25438 enum { load, store } klass;
25440 switch ((enum ix86_builtin_func_type) d->flag)
25442 case VOID_FTYPE_VOID:
25443 emit_insn (GEN_FCN (icode) (target));
25445 case VOID_FTYPE_UINT64:
25446 case VOID_FTYPE_UNSIGNED:
25452 case UINT64_FTYPE_VOID:
25453 case UNSIGNED_FTYPE_VOID:
25454 case UINT16_FTYPE_VOID:
25459 case UINT64_FTYPE_PUNSIGNED:
25460 case V2DI_FTYPE_PV2DI:
25461 case V32QI_FTYPE_PCCHAR:
25462 case V16QI_FTYPE_PCCHAR:
25463 case V8SF_FTYPE_PCV4SF:
25464 case V8SF_FTYPE_PCFLOAT:
25465 case V4SF_FTYPE_PCFLOAT:
25466 case V4DF_FTYPE_PCV2DF:
25467 case V4DF_FTYPE_PCDOUBLE:
25468 case V2DF_FTYPE_PCDOUBLE:
25469 case VOID_FTYPE_PVOID:
25474 case VOID_FTYPE_PV2SF_V4SF:
25475 case VOID_FTYPE_PV4DI_V4DI:
25476 case VOID_FTYPE_PV2DI_V2DI:
25477 case VOID_FTYPE_PCHAR_V32QI:
25478 case VOID_FTYPE_PCHAR_V16QI:
25479 case VOID_FTYPE_PFLOAT_V8SF:
25480 case VOID_FTYPE_PFLOAT_V4SF:
25481 case VOID_FTYPE_PDOUBLE_V4DF:
25482 case VOID_FTYPE_PDOUBLE_V2DF:
25483 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25484 case VOID_FTYPE_PINT_INT:
25487 /* Reserve memory operand for target. */
25488 memory = ARRAY_SIZE (args);
25490 case V4SF_FTYPE_V4SF_PCV2SF:
25491 case V2DF_FTYPE_V2DF_PCDOUBLE:
25496 case V8SF_FTYPE_PCV8SF_V8SF:
25497 case V4DF_FTYPE_PCV4DF_V4DF:
25498 case V4SF_FTYPE_PCV4SF_V4SF:
25499 case V2DF_FTYPE_PCV2DF_V2DF:
25504 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25505 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25506 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25507 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25510 /* Reserve memory operand for target. */
25511 memory = ARRAY_SIZE (args);
25513 case VOID_FTYPE_UINT_UINT_UINT:
25514 case VOID_FTYPE_UINT64_UINT_UINT:
25515 case UCHAR_FTYPE_UINT_UINT_UINT:
25516 case UCHAR_FTYPE_UINT64_UINT_UINT:
25519 memory = ARRAY_SIZE (args);
25520 last_arg_constant = true;
25523 gcc_unreachable ();
25526 gcc_assert (nargs <= ARRAY_SIZE (args));
25528 if (klass == store)
25530 arg = CALL_EXPR_ARG (exp, 0);
25531 op = expand_normal (arg);
25532 gcc_assert (target == 0);
25534 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25536 target = force_reg (tmode, op);
25544 || GET_MODE (target) != tmode
25545 || !insn_p->operand[0].predicate (target, tmode))
25546 target = gen_reg_rtx (tmode);
25549 for (i = 0; i < nargs; i++)
25551 enum machine_mode mode = insn_p->operand[i + 1].mode;
25554 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25555 op = expand_normal (arg);
25556 match = insn_p->operand[i + 1].predicate (op, mode);
25558 if (last_arg_constant && (i + 1) == nargs)
25562 if (icode == CODE_FOR_lwp_lwpvalsi3
25563 || icode == CODE_FOR_lwp_lwpinssi3
25564 || icode == CODE_FOR_lwp_lwpvaldi3
25565 || icode == CODE_FOR_lwp_lwpinsdi3)
25566 error ("the last argument must be a 32-bit immediate");
25568 error ("the last argument must be an 8-bit immediate");
25576 /* This must be the memory operand. */
25577 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25578 gcc_assert (GET_MODE (op) == mode
25579 || GET_MODE (op) == VOIDmode);
25583 /* This must be register. */
25584 if (VECTOR_MODE_P (mode))
25585 op = safe_vector_operand (op, mode);
25587 gcc_assert (GET_MODE (op) == mode
25588 || GET_MODE (op) == VOIDmode);
25589 op = copy_to_mode_reg (mode, op);
25594 args[i].mode = mode;
25600 pat = GEN_FCN (icode) (target);
25603 pat = GEN_FCN (icode) (target, args[0].op);
25606 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25609 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25612 gcc_unreachable ();
25618 return klass == store ? 0 : target;
25621 /* Return the integer constant in ARG. Constrain it to be in the range
25622 of the subparts of VEC_TYPE; issue an error if not. */
25625 get_element_number (tree vec_type, tree arg)
25627 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25629 if (!host_integerp (arg, 1)
25630 || (elt = tree_low_cst (arg, 1), elt > max))
25632 error ("selector must be an integer constant in the range 0..%wi", max);
25639 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25640 ix86_expand_vector_init. We DO have language-level syntax for this, in
25641 the form of (type){ init-list }. Except that since we can't place emms
25642 instructions from inside the compiler, we can't allow the use of MMX
25643 registers unless the user explicitly asks for it. So we do *not* define
25644 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25645 we have builtins invoked by mmintrin.h that gives us license to emit
25646 these sorts of instructions. */
25649 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25651 enum machine_mode tmode = TYPE_MODE (type);
25652 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25653 int i, n_elt = GET_MODE_NUNITS (tmode);
25654 rtvec v = rtvec_alloc (n_elt);
25656 gcc_assert (VECTOR_MODE_P (tmode));
25657 gcc_assert (call_expr_nargs (exp) == n_elt);
25659 for (i = 0; i < n_elt; ++i)
25661 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25662 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25665 if (!target || !register_operand (target, tmode))
25666 target = gen_reg_rtx (tmode);
25668 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25672 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25673 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25674 had a language-level syntax for referencing vector elements. */
25677 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25679 enum machine_mode tmode, mode0;
25684 arg0 = CALL_EXPR_ARG (exp, 0);
25685 arg1 = CALL_EXPR_ARG (exp, 1);
25687 op0 = expand_normal (arg0);
25688 elt = get_element_number (TREE_TYPE (arg0), arg1);
25690 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25691 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25692 gcc_assert (VECTOR_MODE_P (mode0));
25694 op0 = force_reg (mode0, op0);
25696 if (optimize || !target || !register_operand (target, tmode))
25697 target = gen_reg_rtx (tmode);
25699 ix86_expand_vector_extract (true, target, op0, elt);
25704 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25705 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25706 a language-level syntax for referencing vector elements. */
25709 ix86_expand_vec_set_builtin (tree exp)
25711 enum machine_mode tmode, mode1;
25712 tree arg0, arg1, arg2;
25714 rtx op0, op1, target;
25716 arg0 = CALL_EXPR_ARG (exp, 0);
25717 arg1 = CALL_EXPR_ARG (exp, 1);
25718 arg2 = CALL_EXPR_ARG (exp, 2);
25720 tmode = TYPE_MODE (TREE_TYPE (arg0));
25721 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25722 gcc_assert (VECTOR_MODE_P (tmode));
25724 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25725 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25726 elt = get_element_number (TREE_TYPE (arg0), arg2);
25728 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25729 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25731 op0 = force_reg (tmode, op0);
25732 op1 = force_reg (mode1, op1);
25734 /* OP0 is the source of these builtin functions and shouldn't be
25735 modified. Create a copy, use it and return it as target. */
25736 target = gen_reg_rtx (tmode);
25737 emit_move_insn (target, op0);
25738 ix86_expand_vector_set (true, target, op1, elt);
25743 /* Expand an expression EXP that calls a built-in function,
25744 with result going to TARGET if that's convenient
25745 (and in mode MODE if that's convenient).
25746 SUBTARGET may be used as the target for computing one of EXP's operands.
25747 IGNORE is nonzero if the value is to be ignored. */
25750 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25751 enum machine_mode mode ATTRIBUTE_UNUSED,
25752 int ignore ATTRIBUTE_UNUSED)
25754 const struct builtin_description *d;
25756 enum insn_code icode;
25757 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25758 tree arg0, arg1, arg2;
25759 rtx op0, op1, op2, pat;
25760 enum machine_mode mode0, mode1, mode2;
25761 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25763 /* Determine whether the builtin function is available under the current ISA.
25764 Originally the builtin was not created if it wasn't applicable to the
25765 current ISA based on the command line switches. With function specific
25766 options, we need to check in the context of the function making the call
25767 whether it is supported. */
25768 if (ix86_builtins_isa[fcode].isa
25769 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25771 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25772 NULL, NULL, false);
25775 error ("%qE needs unknown isa option", fndecl);
25778 gcc_assert (opts != NULL);
25779 error ("%qE needs isa option %s", fndecl, opts);
25787 case IX86_BUILTIN_MASKMOVQ:
25788 case IX86_BUILTIN_MASKMOVDQU:
25789 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25790 ? CODE_FOR_mmx_maskmovq
25791 : CODE_FOR_sse2_maskmovdqu);
25792 /* Note the arg order is different from the operand order. */
25793 arg1 = CALL_EXPR_ARG (exp, 0);
25794 arg2 = CALL_EXPR_ARG (exp, 1);
25795 arg0 = CALL_EXPR_ARG (exp, 2);
25796 op0 = expand_normal (arg0);
25797 op1 = expand_normal (arg1);
25798 op2 = expand_normal (arg2);
25799 mode0 = insn_data[icode].operand[0].mode;
25800 mode1 = insn_data[icode].operand[1].mode;
25801 mode2 = insn_data[icode].operand[2].mode;
25803 op0 = force_reg (Pmode, op0);
25804 op0 = gen_rtx_MEM (mode1, op0);
25806 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25807 op0 = copy_to_mode_reg (mode0, op0);
25808 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25809 op1 = copy_to_mode_reg (mode1, op1);
25810 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25811 op2 = copy_to_mode_reg (mode2, op2);
25812 pat = GEN_FCN (icode) (op0, op1, op2);
25818 case IX86_BUILTIN_LDMXCSR:
25819 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25820 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25821 emit_move_insn (target, op0);
25822 emit_insn (gen_sse_ldmxcsr (target));
25825 case IX86_BUILTIN_STMXCSR:
25826 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25827 emit_insn (gen_sse_stmxcsr (target));
25828 return copy_to_mode_reg (SImode, target);
25830 case IX86_BUILTIN_CLFLUSH:
25831 arg0 = CALL_EXPR_ARG (exp, 0);
25832 op0 = expand_normal (arg0);
25833 icode = CODE_FOR_sse2_clflush;
25834 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25835 op0 = copy_to_mode_reg (Pmode, op0);
25837 emit_insn (gen_sse2_clflush (op0));
25840 case IX86_BUILTIN_MONITOR:
25841 arg0 = CALL_EXPR_ARG (exp, 0);
25842 arg1 = CALL_EXPR_ARG (exp, 1);
25843 arg2 = CALL_EXPR_ARG (exp, 2);
25844 op0 = expand_normal (arg0);
25845 op1 = expand_normal (arg1);
25846 op2 = expand_normal (arg2);
25848 op0 = copy_to_mode_reg (Pmode, op0);
25850 op1 = copy_to_mode_reg (SImode, op1);
25852 op2 = copy_to_mode_reg (SImode, op2);
25853 emit_insn (ix86_gen_monitor (op0, op1, op2));
25856 case IX86_BUILTIN_MWAIT:
25857 arg0 = CALL_EXPR_ARG (exp, 0);
25858 arg1 = CALL_EXPR_ARG (exp, 1);
25859 op0 = expand_normal (arg0);
25860 op1 = expand_normal (arg1);
25862 op0 = copy_to_mode_reg (SImode, op0);
25864 op1 = copy_to_mode_reg (SImode, op1);
25865 emit_insn (gen_sse3_mwait (op0, op1));
25868 case IX86_BUILTIN_VEC_INIT_V2SI:
25869 case IX86_BUILTIN_VEC_INIT_V4HI:
25870 case IX86_BUILTIN_VEC_INIT_V8QI:
25871 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25873 case IX86_BUILTIN_VEC_EXT_V2DF:
25874 case IX86_BUILTIN_VEC_EXT_V2DI:
25875 case IX86_BUILTIN_VEC_EXT_V4SF:
25876 case IX86_BUILTIN_VEC_EXT_V4SI:
25877 case IX86_BUILTIN_VEC_EXT_V8HI:
25878 case IX86_BUILTIN_VEC_EXT_V2SI:
25879 case IX86_BUILTIN_VEC_EXT_V4HI:
25880 case IX86_BUILTIN_VEC_EXT_V16QI:
25881 return ix86_expand_vec_ext_builtin (exp, target);
25883 case IX86_BUILTIN_VEC_SET_V2DI:
25884 case IX86_BUILTIN_VEC_SET_V4SF:
25885 case IX86_BUILTIN_VEC_SET_V4SI:
25886 case IX86_BUILTIN_VEC_SET_V8HI:
25887 case IX86_BUILTIN_VEC_SET_V4HI:
25888 case IX86_BUILTIN_VEC_SET_V16QI:
25889 return ix86_expand_vec_set_builtin (exp);
25891 case IX86_BUILTIN_VEC_PERM_V2DF:
25892 case IX86_BUILTIN_VEC_PERM_V4SF:
25893 case IX86_BUILTIN_VEC_PERM_V2DI:
25894 case IX86_BUILTIN_VEC_PERM_V4SI:
25895 case IX86_BUILTIN_VEC_PERM_V8HI:
25896 case IX86_BUILTIN_VEC_PERM_V16QI:
25897 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25898 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25899 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25900 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25901 case IX86_BUILTIN_VEC_PERM_V4DF:
25902 case IX86_BUILTIN_VEC_PERM_V8SF:
25903 return ix86_expand_vec_perm_builtin (exp);
25905 case IX86_BUILTIN_INFQ:
25906 case IX86_BUILTIN_HUGE_VALQ:
25908 REAL_VALUE_TYPE inf;
25912 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25914 tmp = validize_mem (force_const_mem (mode, tmp));
25917 target = gen_reg_rtx (mode);
25919 emit_move_insn (target, tmp);
25923 case IX86_BUILTIN_LLWPCB:
25924 arg0 = CALL_EXPR_ARG (exp, 0);
25925 op0 = expand_normal (arg0);
25926 icode = CODE_FOR_lwp_llwpcb;
25927 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25928 op0 = copy_to_mode_reg (Pmode, op0);
25929 emit_insn (gen_lwp_llwpcb (op0));
25932 case IX86_BUILTIN_SLWPCB:
25933 icode = CODE_FOR_lwp_slwpcb;
25935 || !insn_data[icode].operand[0].predicate (target, Pmode))
25936 target = gen_reg_rtx (Pmode);
25937 emit_insn (gen_lwp_slwpcb (target));
25944 for (i = 0, d = bdesc_special_args;
25945 i < ARRAY_SIZE (bdesc_special_args);
25947 if (d->code == fcode)
25948 return ix86_expand_special_args_builtin (d, exp, target);
25950 for (i = 0, d = bdesc_args;
25951 i < ARRAY_SIZE (bdesc_args);
25953 if (d->code == fcode)
25956 case IX86_BUILTIN_FABSQ:
25957 case IX86_BUILTIN_COPYSIGNQ:
25959 /* Emit a normal call if SSE2 isn't available. */
25960 return expand_call (exp, target, ignore);
25962 return ix86_expand_args_builtin (d, exp, target);
25965 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25966 if (d->code == fcode)
25967 return ix86_expand_sse_comi (d, exp, target);
25969 for (i = 0, d = bdesc_pcmpestr;
25970 i < ARRAY_SIZE (bdesc_pcmpestr);
25972 if (d->code == fcode)
25973 return ix86_expand_sse_pcmpestr (d, exp, target);
25975 for (i = 0, d = bdesc_pcmpistr;
25976 i < ARRAY_SIZE (bdesc_pcmpistr);
25978 if (d->code == fcode)
25979 return ix86_expand_sse_pcmpistr (d, exp, target);
25981 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25982 if (d->code == fcode)
25983 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25984 (enum ix86_builtin_func_type)
25985 d->flag, d->comparison);
25987 gcc_unreachable ();
25990 /* Returns a function decl for a vectorized version of the builtin function
25991 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25992 if it is not available. */
25995 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25998 enum machine_mode in_mode, out_mode;
26000 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
26002 if (TREE_CODE (type_out) != VECTOR_TYPE
26003 || TREE_CODE (type_in) != VECTOR_TYPE
26004 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
26007 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26008 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26009 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26010 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26014 case BUILT_IN_SQRT:
26015 if (out_mode == DFmode && out_n == 2
26016 && in_mode == DFmode && in_n == 2)
26017 return ix86_builtins[IX86_BUILTIN_SQRTPD];
26020 case BUILT_IN_SQRTF:
26021 if (out_mode == SFmode && out_n == 4
26022 && in_mode == SFmode && in_n == 4)
26023 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
26026 case BUILT_IN_LRINT:
26027 if (out_mode == SImode && out_n == 4
26028 && in_mode == DFmode && in_n == 2)
26029 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
26032 case BUILT_IN_LRINTF:
26033 if (out_mode == SImode && out_n == 4
26034 && in_mode == SFmode && in_n == 4)
26035 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26038 case BUILT_IN_COPYSIGN:
26039 if (out_mode == DFmode && out_n == 2
26040 && in_mode == DFmode && in_n == 2)
26041 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26044 case BUILT_IN_COPYSIGNF:
26045 if (out_mode == SFmode && out_n == 4
26046 && in_mode == SFmode && in_n == 4)
26047 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26054 /* Dispatch to a handler for a vectorization library. */
26055 if (ix86_veclib_handler)
26056 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26062 /* Handler for an SVML-style interface to
26063 a library with vectorized intrinsics. */
26066 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26069 tree fntype, new_fndecl, args;
26072 enum machine_mode el_mode, in_mode;
26075 /* The SVML is suitable for unsafe math only. */
26076 if (!flag_unsafe_math_optimizations)
26079 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26080 n = TYPE_VECTOR_SUBPARTS (type_out);
26081 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26082 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26083 if (el_mode != in_mode
26091 case BUILT_IN_LOG10:
26093 case BUILT_IN_TANH:
26095 case BUILT_IN_ATAN:
26096 case BUILT_IN_ATAN2:
26097 case BUILT_IN_ATANH:
26098 case BUILT_IN_CBRT:
26099 case BUILT_IN_SINH:
26101 case BUILT_IN_ASINH:
26102 case BUILT_IN_ASIN:
26103 case BUILT_IN_COSH:
26105 case BUILT_IN_ACOSH:
26106 case BUILT_IN_ACOS:
26107 if (el_mode != DFmode || n != 2)
26111 case BUILT_IN_EXPF:
26112 case BUILT_IN_LOGF:
26113 case BUILT_IN_LOG10F:
26114 case BUILT_IN_POWF:
26115 case BUILT_IN_TANHF:
26116 case BUILT_IN_TANF:
26117 case BUILT_IN_ATANF:
26118 case BUILT_IN_ATAN2F:
26119 case BUILT_IN_ATANHF:
26120 case BUILT_IN_CBRTF:
26121 case BUILT_IN_SINHF:
26122 case BUILT_IN_SINF:
26123 case BUILT_IN_ASINHF:
26124 case BUILT_IN_ASINF:
26125 case BUILT_IN_COSHF:
26126 case BUILT_IN_COSF:
26127 case BUILT_IN_ACOSHF:
26128 case BUILT_IN_ACOSF:
26129 if (el_mode != SFmode || n != 4)
26137 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26139 if (fn == BUILT_IN_LOGF)
26140 strcpy (name, "vmlsLn4");
26141 else if (fn == BUILT_IN_LOG)
26142 strcpy (name, "vmldLn2");
26145 sprintf (name, "vmls%s", bname+10);
26146 name[strlen (name)-1] = '4';
26149 sprintf (name, "vmld%s2", bname+10);
26151 /* Convert to uppercase. */
26155 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26156 args = TREE_CHAIN (args))
26160 fntype = build_function_type_list (type_out, type_in, NULL);
26162 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26164 /* Build a function declaration for the vectorized function. */
26165 new_fndecl = build_decl (BUILTINS_LOCATION,
26166 FUNCTION_DECL, get_identifier (name), fntype);
26167 TREE_PUBLIC (new_fndecl) = 1;
26168 DECL_EXTERNAL (new_fndecl) = 1;
26169 DECL_IS_NOVOPS (new_fndecl) = 1;
26170 TREE_READONLY (new_fndecl) = 1;
26175 /* Handler for an ACML-style interface to
26176 a library with vectorized intrinsics. */
26179 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26181 char name[20] = "__vr.._";
26182 tree fntype, new_fndecl, args;
26185 enum machine_mode el_mode, in_mode;
26188 /* The ACML is 64bits only and suitable for unsafe math only as
26189 it does not correctly support parts of IEEE with the required
26190 precision such as denormals. */
26192 || !flag_unsafe_math_optimizations)
26195 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26196 n = TYPE_VECTOR_SUBPARTS (type_out);
26197 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26198 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26199 if (el_mode != in_mode
26209 case BUILT_IN_LOG2:
26210 case BUILT_IN_LOG10:
26213 if (el_mode != DFmode
26218 case BUILT_IN_SINF:
26219 case BUILT_IN_COSF:
26220 case BUILT_IN_EXPF:
26221 case BUILT_IN_POWF:
26222 case BUILT_IN_LOGF:
26223 case BUILT_IN_LOG2F:
26224 case BUILT_IN_LOG10F:
26227 if (el_mode != SFmode
26236 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26237 sprintf (name + 7, "%s", bname+10);
26240 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26241 args = TREE_CHAIN (args))
26245 fntype = build_function_type_list (type_out, type_in, NULL);
26247 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26249 /* Build a function declaration for the vectorized function. */
26250 new_fndecl = build_decl (BUILTINS_LOCATION,
26251 FUNCTION_DECL, get_identifier (name), fntype);
26252 TREE_PUBLIC (new_fndecl) = 1;
26253 DECL_EXTERNAL (new_fndecl) = 1;
26254 DECL_IS_NOVOPS (new_fndecl) = 1;
26255 TREE_READONLY (new_fndecl) = 1;
26261 /* Returns a decl of a function that implements conversion of an integer vector
26262 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26263 are the types involved when converting according to CODE.
26264 Return NULL_TREE if it is not available. */
26267 ix86_vectorize_builtin_conversion (unsigned int code,
26268 tree dest_type, tree src_type)
26276 switch (TYPE_MODE (src_type))
26279 switch (TYPE_MODE (dest_type))
26282 return (TYPE_UNSIGNED (src_type)
26283 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26284 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26286 return (TYPE_UNSIGNED (src_type)
26288 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26294 switch (TYPE_MODE (dest_type))
26297 return (TYPE_UNSIGNED (src_type)
26299 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26308 case FIX_TRUNC_EXPR:
26309 switch (TYPE_MODE (dest_type))
26312 switch (TYPE_MODE (src_type))
26315 return (TYPE_UNSIGNED (dest_type)
26317 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26319 return (TYPE_UNSIGNED (dest_type)
26321 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26328 switch (TYPE_MODE (src_type))
26331 return (TYPE_UNSIGNED (dest_type)
26333 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26350 /* Returns a code for a target-specific builtin that implements
26351 reciprocal of the function, or NULL_TREE if not available. */
26354 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26355 bool sqrt ATTRIBUTE_UNUSED)
26357 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26358 && flag_finite_math_only && !flag_trapping_math
26359 && flag_unsafe_math_optimizations))
26363 /* Machine dependent builtins. */
26366 /* Vectorized version of sqrt to rsqrt conversion. */
26367 case IX86_BUILTIN_SQRTPS_NR:
26368 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26374 /* Normal builtins. */
26377 /* Sqrt to rsqrt conversion. */
26378 case BUILT_IN_SQRTF:
26379 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26386 /* Helper for avx_vpermilps256_operand et al. This is also used by
26387 the expansion functions to turn the parallel back into a mask.
26388 The return value is 0 for no match and the imm8+1 for a match. */
26391 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
26393 unsigned i, nelt = GET_MODE_NUNITS (mode);
26395 unsigned char ipar[8];
26397 if (XVECLEN (par, 0) != (int) nelt)
26400 /* Validate that all of the elements are constants, and not totally
26401 out of range. Copy the data into an integral array to make the
26402 subsequent checks easier. */
26403 for (i = 0; i < nelt; ++i)
26405 rtx er = XVECEXP (par, 0, i);
26406 unsigned HOST_WIDE_INT ei;
26408 if (!CONST_INT_P (er))
26419 /* In the 256-bit DFmode case, we can only move elements within
26421 for (i = 0; i < 2; ++i)
26425 mask |= ipar[i] << i;
26427 for (i = 2; i < 4; ++i)
26431 mask |= (ipar[i] - 2) << i;
26436 /* In the 256-bit SFmode case, we have full freedom of movement
26437 within the low 128-bit lane, but the high 128-bit lane must
26438 mirror the exact same pattern. */
26439 for (i = 0; i < 4; ++i)
26440 if (ipar[i] + 4 != ipar[i + 4])
26447 /* In the 128-bit case, we've full freedom in the placement of
26448 the elements from the source operand. */
26449 for (i = 0; i < nelt; ++i)
26450 mask |= ipar[i] << (i * (nelt / 2));
26454 gcc_unreachable ();
26457 /* Make sure success has a non-zero value by adding one. */
26461 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26462 the expansion functions to turn the parallel back into a mask.
26463 The return value is 0 for no match and the imm8+1 for a match. */
26466 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26468 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26470 unsigned char ipar[8];
26472 if (XVECLEN (par, 0) != (int) nelt)
26475 /* Validate that all of the elements are constants, and not totally
26476 out of range. Copy the data into an integral array to make the
26477 subsequent checks easier. */
26478 for (i = 0; i < nelt; ++i)
26480 rtx er = XVECEXP (par, 0, i);
26481 unsigned HOST_WIDE_INT ei;
26483 if (!CONST_INT_P (er))
26486 if (ei >= 2 * nelt)
26491 /* Validate that the halves of the permute are halves. */
26492 for (i = 0; i < nelt2 - 1; ++i)
26493 if (ipar[i] + 1 != ipar[i + 1])
26495 for (i = nelt2; i < nelt - 1; ++i)
26496 if (ipar[i] + 1 != ipar[i + 1])
26499 /* Reconstruct the mask. */
26500 for (i = 0; i < 2; ++i)
26502 unsigned e = ipar[i * nelt2];
26506 mask |= e << (i * 4);
26509 /* Make sure success has a non-zero value by adding one. */
26514 /* Store OPERAND to the memory after reload is completed. This means
26515 that we can't easily use assign_stack_local. */
26517 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26521 gcc_assert (reload_completed);
26522 if (ix86_using_red_zone ())
26524 result = gen_rtx_MEM (mode,
26525 gen_rtx_PLUS (Pmode,
26527 GEN_INT (-RED_ZONE_SIZE)));
26528 emit_move_insn (result, operand);
26530 else if (TARGET_64BIT)
26536 operand = gen_lowpart (DImode, operand);
26540 gen_rtx_SET (VOIDmode,
26541 gen_rtx_MEM (DImode,
26542 gen_rtx_PRE_DEC (DImode,
26543 stack_pointer_rtx)),
26547 gcc_unreachable ();
26549 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26558 split_double_mode (mode, &operand, 1, operands, operands + 1);
26560 gen_rtx_SET (VOIDmode,
26561 gen_rtx_MEM (SImode,
26562 gen_rtx_PRE_DEC (Pmode,
26563 stack_pointer_rtx)),
26566 gen_rtx_SET (VOIDmode,
26567 gen_rtx_MEM (SImode,
26568 gen_rtx_PRE_DEC (Pmode,
26569 stack_pointer_rtx)),
26574 /* Store HImodes as SImodes. */
26575 operand = gen_lowpart (SImode, operand);
26579 gen_rtx_SET (VOIDmode,
26580 gen_rtx_MEM (GET_MODE (operand),
26581 gen_rtx_PRE_DEC (SImode,
26582 stack_pointer_rtx)),
26586 gcc_unreachable ();
26588 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26593 /* Free operand from the memory. */
26595 ix86_free_from_memory (enum machine_mode mode)
26597 if (!ix86_using_red_zone ())
26601 if (mode == DImode || TARGET_64BIT)
26605 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26606 to pop or add instruction if registers are available. */
26607 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26608 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26613 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26614 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26616 static const reg_class_t *
26617 i386_ira_cover_classes (void)
26619 static const reg_class_t sse_fpmath_classes[] = {
26620 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26622 static const reg_class_t no_sse_fpmath_classes[] = {
26623 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26626 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26629 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26630 QImode must go into class Q_REGS.
26631 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26632 movdf to do mem-to-mem moves through integer regs. */
26634 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26636 enum machine_mode mode = GET_MODE (x);
26638 /* We're only allowed to return a subclass of CLASS. Many of the
26639 following checks fail for NO_REGS, so eliminate that early. */
26640 if (regclass == NO_REGS)
26643 /* All classes can load zeros. */
26644 if (x == CONST0_RTX (mode))
26647 /* Force constants into memory if we are loading a (nonzero) constant into
26648 an MMX or SSE register. This is because there are no MMX/SSE instructions
26649 to load from a constant. */
26651 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26654 /* Prefer SSE regs only, if we can use them for math. */
26655 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26656 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26658 /* Floating-point constants need more complex checks. */
26659 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26661 /* General regs can load everything. */
26662 if (reg_class_subset_p (regclass, GENERAL_REGS))
26665 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26666 zero above. We only want to wind up preferring 80387 registers if
26667 we plan on doing computation with them. */
26669 && standard_80387_constant_p (x))
26671 /* Limit class to non-sse. */
26672 if (regclass == FLOAT_SSE_REGS)
26674 if (regclass == FP_TOP_SSE_REGS)
26676 if (regclass == FP_SECOND_SSE_REGS)
26677 return FP_SECOND_REG;
26678 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26685 /* Generally when we see PLUS here, it's the function invariant
26686 (plus soft-fp const_int). Which can only be computed into general
26688 if (GET_CODE (x) == PLUS)
26689 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26691 /* QImode constants are easy to load, but non-constant QImode data
26692 must go into Q_REGS. */
26693 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26695 if (reg_class_subset_p (regclass, Q_REGS))
26697 if (reg_class_subset_p (Q_REGS, regclass))
26705 /* Discourage putting floating-point values in SSE registers unless
26706 SSE math is being used, and likewise for the 387 registers. */
26708 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26710 enum machine_mode mode = GET_MODE (x);
26712 /* Restrict the output reload class to the register bank that we are doing
26713 math on. If we would like not to return a subset of CLASS, reject this
26714 alternative: if reload cannot do this, it will still use its choice. */
26715 mode = GET_MODE (x);
26716 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26717 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26719 if (X87_FLOAT_MODE_P (mode))
26721 if (regclass == FP_TOP_SSE_REGS)
26723 else if (regclass == FP_SECOND_SSE_REGS)
26724 return FP_SECOND_REG;
26726 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26733 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26734 enum machine_mode mode,
26735 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26737 /* QImode spills from non-QI registers require
26738 intermediate register on 32bit targets. */
26739 if (!in_p && mode == QImode && !TARGET_64BIT
26740 && (rclass == GENERAL_REGS
26741 || rclass == LEGACY_REGS
26742 || rclass == INDEX_REGS))
26751 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26752 regno = true_regnum (x);
26754 /* Return Q_REGS if the operand is in memory. */
26762 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26765 ix86_class_likely_spilled_p (reg_class_t rclass)
26776 case SSE_FIRST_REG:
26778 case FP_SECOND_REG:
26788 /* If we are copying between general and FP registers, we need a memory
26789 location. The same is true for SSE and MMX registers.
26791 To optimize register_move_cost performance, allow inline variant.
26793 The macro can't work reliably when one of the CLASSES is class containing
26794 registers from multiple units (SSE, MMX, integer). We avoid this by never
26795 combining those units in single alternative in the machine description.
26796 Ensure that this constraint holds to avoid unexpected surprises.
26798 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26799 enforce these sanity checks. */
26802 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26803 enum machine_mode mode, int strict)
26805 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26806 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26807 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26808 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26809 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26810 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26812 gcc_assert (!strict);
26816 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26819 /* ??? This is a lie. We do have moves between mmx/general, and for
26820 mmx/sse2. But by saying we need secondary memory we discourage the
26821 register allocator from using the mmx registers unless needed. */
26822 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26825 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26827 /* SSE1 doesn't have any direct moves from other classes. */
26831 /* If the target says that inter-unit moves are more expensive
26832 than moving through memory, then don't generate them. */
26833 if (!TARGET_INTER_UNIT_MOVES)
26836 /* Between SSE and general, we have moves no larger than word size. */
26837 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26845 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26846 enum machine_mode mode, int strict)
26848 return inline_secondary_memory_needed (class1, class2, mode, strict);
26851 /* Return true if the registers in CLASS cannot represent the change from
26852 modes FROM to TO. */
26855 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26856 enum reg_class regclass)
26861 /* x87 registers can't do subreg at all, as all values are reformatted
26862 to extended precision. */
26863 if (MAYBE_FLOAT_CLASS_P (regclass))
26866 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26868 /* Vector registers do not support QI or HImode loads. If we don't
26869 disallow a change to these modes, reload will assume it's ok to
26870 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26871 the vec_dupv4hi pattern. */
26872 if (GET_MODE_SIZE (from) < 4)
26875 /* Vector registers do not support subreg with nonzero offsets, which
26876 are otherwise valid for integer registers. Since we can't see
26877 whether we have a nonzero offset from here, prohibit all
26878 nonparadoxical subregs changing size. */
26879 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26886 /* Return the cost of moving data of mode M between a
26887 register and memory. A value of 2 is the default; this cost is
26888 relative to those in `REGISTER_MOVE_COST'.
26890 This function is used extensively by register_move_cost that is used to
26891 build tables at startup. Make it inline in this case.
26892 When IN is 2, return maximum of in and out move cost.
26894 If moving between registers and memory is more expensive than
26895 between two registers, you should define this macro to express the
26898 Model also increased moving costs of QImode registers in non
26902 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26906 if (FLOAT_CLASS_P (regclass))
26924 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26925 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26927 if (SSE_CLASS_P (regclass))
26930 switch (GET_MODE_SIZE (mode))
26945 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26946 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26948 if (MMX_CLASS_P (regclass))
26951 switch (GET_MODE_SIZE (mode))
26963 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26964 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26966 switch (GET_MODE_SIZE (mode))
26969 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26972 return ix86_cost->int_store[0];
26973 if (TARGET_PARTIAL_REG_DEPENDENCY
26974 && optimize_function_for_speed_p (cfun))
26975 cost = ix86_cost->movzbl_load;
26977 cost = ix86_cost->int_load[0];
26979 return MAX (cost, ix86_cost->int_store[0]);
26985 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26987 return ix86_cost->movzbl_load;
26989 return ix86_cost->int_store[0] + 4;
26994 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26995 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26997 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26998 if (mode == TFmode)
27001 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
27003 cost = ix86_cost->int_load[2];
27005 cost = ix86_cost->int_store[2];
27006 return (cost * (((int) GET_MODE_SIZE (mode)
27007 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
27012 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
27015 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
27019 /* Return the cost of moving data from a register in class CLASS1 to
27020 one in class CLASS2.
27022 It is not required that the cost always equal 2 when FROM is the same as TO;
27023 on some machines it is expensive to move between registers if they are not
27024 general registers. */
27027 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
27028 reg_class_t class2_i)
27030 enum reg_class class1 = (enum reg_class) class1_i;
27031 enum reg_class class2 = (enum reg_class) class2_i;
27033 /* In case we require secondary memory, compute cost of the store followed
27034 by load. In order to avoid bad register allocation choices, we need
27035 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27037 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27041 cost += inline_memory_move_cost (mode, class1, 2);
27042 cost += inline_memory_move_cost (mode, class2, 2);
27044 /* In case of copying from general_purpose_register we may emit multiple
27045 stores followed by single load causing memory size mismatch stall.
27046 Count this as arbitrarily high cost of 20. */
27047 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27050 /* In the case of FP/MMX moves, the registers actually overlap, and we
27051 have to switch modes in order to treat them differently. */
27052 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27053 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27059 /* Moves between SSE/MMX and integer unit are expensive. */
27060 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27061 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27063 /* ??? By keeping returned value relatively high, we limit the number
27064 of moves between integer and MMX/SSE registers for all targets.
27065 Additionally, high value prevents problem with x86_modes_tieable_p(),
27066 where integer modes in MMX/SSE registers are not tieable
27067 because of missing QImode and HImode moves to, from or between
27068 MMX/SSE registers. */
27069 return MAX (8, ix86_cost->mmxsse_to_integer);
27071 if (MAYBE_FLOAT_CLASS_P (class1))
27072 return ix86_cost->fp_move;
27073 if (MAYBE_SSE_CLASS_P (class1))
27074 return ix86_cost->sse_move;
27075 if (MAYBE_MMX_CLASS_P (class1))
27076 return ix86_cost->mmx_move;
27080 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27083 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27085 /* Flags and only flags can only hold CCmode values. */
27086 if (CC_REGNO_P (regno))
27087 return GET_MODE_CLASS (mode) == MODE_CC;
27088 if (GET_MODE_CLASS (mode) == MODE_CC
27089 || GET_MODE_CLASS (mode) == MODE_RANDOM
27090 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27092 if (FP_REGNO_P (regno))
27093 return VALID_FP_MODE_P (mode);
27094 if (SSE_REGNO_P (regno))
27096 /* We implement the move patterns for all vector modes into and
27097 out of SSE registers, even when no operation instructions
27098 are available. OImode move is available only when AVX is
27100 return ((TARGET_AVX && mode == OImode)
27101 || VALID_AVX256_REG_MODE (mode)
27102 || VALID_SSE_REG_MODE (mode)
27103 || VALID_SSE2_REG_MODE (mode)
27104 || VALID_MMX_REG_MODE (mode)
27105 || VALID_MMX_REG_MODE_3DNOW (mode));
27107 if (MMX_REGNO_P (regno))
27109 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27110 so if the register is available at all, then we can move data of
27111 the given mode into or out of it. */
27112 return (VALID_MMX_REG_MODE (mode)
27113 || VALID_MMX_REG_MODE_3DNOW (mode));
27116 if (mode == QImode)
27118 /* Take care for QImode values - they can be in non-QI regs,
27119 but then they do cause partial register stalls. */
27120 if (regno <= BX_REG || TARGET_64BIT)
27122 if (!TARGET_PARTIAL_REG_STALL)
27124 return reload_in_progress || reload_completed;
27126 /* We handle both integer and floats in the general purpose registers. */
27127 else if (VALID_INT_MODE_P (mode))
27129 else if (VALID_FP_MODE_P (mode))
27131 else if (VALID_DFP_MODE_P (mode))
27133 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27134 on to use that value in smaller contexts, this can easily force a
27135 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27136 supporting DImode, allow it. */
27137 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27143 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27144 tieable integer mode. */
27147 ix86_tieable_integer_mode_p (enum machine_mode mode)
27156 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27159 return TARGET_64BIT;
27166 /* Return true if MODE1 is accessible in a register that can hold MODE2
27167 without copying. That is, all register classes that can hold MODE2
27168 can also hold MODE1. */
27171 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27173 if (mode1 == mode2)
27176 if (ix86_tieable_integer_mode_p (mode1)
27177 && ix86_tieable_integer_mode_p (mode2))
27180 /* MODE2 being XFmode implies fp stack or general regs, which means we
27181 can tie any smaller floating point modes to it. Note that we do not
27182 tie this with TFmode. */
27183 if (mode2 == XFmode)
27184 return mode1 == SFmode || mode1 == DFmode;
27186 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27187 that we can tie it with SFmode. */
27188 if (mode2 == DFmode)
27189 return mode1 == SFmode;
27191 /* If MODE2 is only appropriate for an SSE register, then tie with
27192 any other mode acceptable to SSE registers. */
27193 if (GET_MODE_SIZE (mode2) == 16
27194 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27195 return (GET_MODE_SIZE (mode1) == 16
27196 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27198 /* If MODE2 is appropriate for an MMX register, then tie
27199 with any other mode acceptable to MMX registers. */
27200 if (GET_MODE_SIZE (mode2) == 8
27201 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27202 return (GET_MODE_SIZE (mode1) == 8
27203 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27208 /* Compute a (partial) cost for rtx X. Return true if the complete
27209 cost has been computed, and false if subexpressions should be
27210 scanned. In either case, *TOTAL contains the cost result. */
27213 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27215 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27216 enum machine_mode mode = GET_MODE (x);
27217 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27225 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27227 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27229 else if (flag_pic && SYMBOLIC_CONST (x)
27231 || (!GET_CODE (x) != LABEL_REF
27232 && (GET_CODE (x) != SYMBOL_REF
27233 || !SYMBOL_REF_LOCAL_P (x)))))
27240 if (mode == VOIDmode)
27243 switch (standard_80387_constant_p (x))
27248 default: /* Other constants */
27253 /* Start with (MEM (SYMBOL_REF)), since that's where
27254 it'll probably end up. Add a penalty for size. */
27255 *total = (COSTS_N_INSNS (1)
27256 + (flag_pic != 0 && !TARGET_64BIT)
27257 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27263 /* The zero extensions is often completely free on x86_64, so make
27264 it as cheap as possible. */
27265 if (TARGET_64BIT && mode == DImode
27266 && GET_MODE (XEXP (x, 0)) == SImode)
27268 else if (TARGET_ZERO_EXTEND_WITH_AND)
27269 *total = cost->add;
27271 *total = cost->movzx;
27275 *total = cost->movsx;
27279 if (CONST_INT_P (XEXP (x, 1))
27280 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27282 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27285 *total = cost->add;
27288 if ((value == 2 || value == 3)
27289 && cost->lea <= cost->shift_const)
27291 *total = cost->lea;
27301 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27303 if (CONST_INT_P (XEXP (x, 1)))
27305 if (INTVAL (XEXP (x, 1)) > 32)
27306 *total = cost->shift_const + COSTS_N_INSNS (2);
27308 *total = cost->shift_const * 2;
27312 if (GET_CODE (XEXP (x, 1)) == AND)
27313 *total = cost->shift_var * 2;
27315 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27320 if (CONST_INT_P (XEXP (x, 1)))
27321 *total = cost->shift_const;
27323 *total = cost->shift_var;
27328 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27330 /* ??? SSE scalar cost should be used here. */
27331 *total = cost->fmul;
27334 else if (X87_FLOAT_MODE_P (mode))
27336 *total = cost->fmul;
27339 else if (FLOAT_MODE_P (mode))
27341 /* ??? SSE vector cost should be used here. */
27342 *total = cost->fmul;
27347 rtx op0 = XEXP (x, 0);
27348 rtx op1 = XEXP (x, 1);
27350 if (CONST_INT_P (XEXP (x, 1)))
27352 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27353 for (nbits = 0; value != 0; value &= value - 1)
27357 /* This is arbitrary. */
27360 /* Compute costs correctly for widening multiplication. */
27361 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27362 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27363 == GET_MODE_SIZE (mode))
27365 int is_mulwiden = 0;
27366 enum machine_mode inner_mode = GET_MODE (op0);
27368 if (GET_CODE (op0) == GET_CODE (op1))
27369 is_mulwiden = 1, op1 = XEXP (op1, 0);
27370 else if (CONST_INT_P (op1))
27372 if (GET_CODE (op0) == SIGN_EXTEND)
27373 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27376 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27380 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
27383 *total = (cost->mult_init[MODE_INDEX (mode)]
27384 + nbits * cost->mult_bit
27385 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27394 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27395 /* ??? SSE cost should be used here. */
27396 *total = cost->fdiv;
27397 else if (X87_FLOAT_MODE_P (mode))
27398 *total = cost->fdiv;
27399 else if (FLOAT_MODE_P (mode))
27400 /* ??? SSE vector cost should be used here. */
27401 *total = cost->fdiv;
27403 *total = cost->divide[MODE_INDEX (mode)];
27407 if (GET_MODE_CLASS (mode) == MODE_INT
27408 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27410 if (GET_CODE (XEXP (x, 0)) == PLUS
27411 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27412 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27413 && CONSTANT_P (XEXP (x, 1)))
27415 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27416 if (val == 2 || val == 4 || val == 8)
27418 *total = cost->lea;
27419 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27420 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27421 outer_code, speed);
27422 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27426 else if (GET_CODE (XEXP (x, 0)) == MULT
27427 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27429 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27430 if (val == 2 || val == 4 || val == 8)
27432 *total = cost->lea;
27433 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27434 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27438 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27440 *total = cost->lea;
27441 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27442 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27443 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27450 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27452 /* ??? SSE cost should be used here. */
27453 *total = cost->fadd;
27456 else if (X87_FLOAT_MODE_P (mode))
27458 *total = cost->fadd;
27461 else if (FLOAT_MODE_P (mode))
27463 /* ??? SSE vector cost should be used here. */
27464 *total = cost->fadd;
27472 if (!TARGET_64BIT && mode == DImode)
27474 *total = (cost->add * 2
27475 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27476 << (GET_MODE (XEXP (x, 0)) != DImode))
27477 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27478 << (GET_MODE (XEXP (x, 1)) != DImode)));
27484 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27486 /* ??? SSE cost should be used here. */
27487 *total = cost->fchs;
27490 else if (X87_FLOAT_MODE_P (mode))
27492 *total = cost->fchs;
27495 else if (FLOAT_MODE_P (mode))
27497 /* ??? SSE vector cost should be used here. */
27498 *total = cost->fchs;
27504 if (!TARGET_64BIT && mode == DImode)
27505 *total = cost->add * 2;
27507 *total = cost->add;
27511 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27512 && XEXP (XEXP (x, 0), 1) == const1_rtx
27513 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27514 && XEXP (x, 1) == const0_rtx)
27516 /* This kind of construct is implemented using test[bwl].
27517 Treat it as if we had an AND. */
27518 *total = (cost->add
27519 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27520 + rtx_cost (const1_rtx, outer_code, speed));
27526 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27531 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27532 /* ??? SSE cost should be used here. */
27533 *total = cost->fabs;
27534 else if (X87_FLOAT_MODE_P (mode))
27535 *total = cost->fabs;
27536 else if (FLOAT_MODE_P (mode))
27537 /* ??? SSE vector cost should be used here. */
27538 *total = cost->fabs;
27542 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27543 /* ??? SSE cost should be used here. */
27544 *total = cost->fsqrt;
27545 else if (X87_FLOAT_MODE_P (mode))
27546 *total = cost->fsqrt;
27547 else if (FLOAT_MODE_P (mode))
27548 /* ??? SSE vector cost should be used here. */
27549 *total = cost->fsqrt;
27553 if (XINT (x, 1) == UNSPEC_TP)
27560 case VEC_DUPLICATE:
27561 /* ??? Assume all of these vector manipulation patterns are
27562 recognizable. In which case they all pretty much have the
27564 *total = COSTS_N_INSNS (1);
27574 static int current_machopic_label_num;
27576 /* Given a symbol name and its associated stub, write out the
27577 definition of the stub. */
27580 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27582 unsigned int length;
27583 char *binder_name, *symbol_name, lazy_ptr_name[32];
27584 int label = ++current_machopic_label_num;
27586 /* For 64-bit we shouldn't get here. */
27587 gcc_assert (!TARGET_64BIT);
27589 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27590 symb = targetm.strip_name_encoding (symb);
27592 length = strlen (stub);
27593 binder_name = XALLOCAVEC (char, length + 32);
27594 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27596 length = strlen (symb);
27597 symbol_name = XALLOCAVEC (char, length + 32);
27598 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27600 sprintf (lazy_ptr_name, "L%d$lz", label);
27603 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27605 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27607 fprintf (file, "%s:\n", stub);
27608 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27612 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27613 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27614 fprintf (file, "\tjmp\t*%%edx\n");
27617 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27619 fprintf (file, "%s:\n", binder_name);
27623 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27624 fputs ("\tpushl\t%eax\n", file);
27627 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27629 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27631 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27632 fprintf (file, "%s:\n", lazy_ptr_name);
27633 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27634 fprintf (file, ASM_LONG "%s\n", binder_name);
27636 #endif /* TARGET_MACHO */
27638 /* Order the registers for register allocator. */
27641 x86_order_regs_for_local_alloc (void)
27646 /* First allocate the local general purpose registers. */
27647 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27648 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27649 reg_alloc_order [pos++] = i;
27651 /* Global general purpose registers. */
27652 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27653 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27654 reg_alloc_order [pos++] = i;
27656 /* x87 registers come first in case we are doing FP math
27658 if (!TARGET_SSE_MATH)
27659 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27660 reg_alloc_order [pos++] = i;
27662 /* SSE registers. */
27663 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27664 reg_alloc_order [pos++] = i;
27665 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27666 reg_alloc_order [pos++] = i;
27668 /* x87 registers. */
27669 if (TARGET_SSE_MATH)
27670 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27671 reg_alloc_order [pos++] = i;
27673 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27674 reg_alloc_order [pos++] = i;
27676 /* Initialize the rest of array as we do not allocate some registers
27678 while (pos < FIRST_PSEUDO_REGISTER)
27679 reg_alloc_order [pos++] = 0;
27682 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27683 struct attribute_spec.handler. */
27685 ix86_handle_abi_attribute (tree *node, tree name,
27686 tree args ATTRIBUTE_UNUSED,
27687 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27689 if (TREE_CODE (*node) != FUNCTION_TYPE
27690 && TREE_CODE (*node) != METHOD_TYPE
27691 && TREE_CODE (*node) != FIELD_DECL
27692 && TREE_CODE (*node) != TYPE_DECL)
27694 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27696 *no_add_attrs = true;
27701 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27703 *no_add_attrs = true;
27707 /* Can combine regparm with all attributes but fastcall. */
27708 if (is_attribute_p ("ms_abi", name))
27710 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27712 error ("ms_abi and sysv_abi attributes are not compatible");
27717 else if (is_attribute_p ("sysv_abi", name))
27719 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27721 error ("ms_abi and sysv_abi attributes are not compatible");
27730 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27731 struct attribute_spec.handler. */
27733 ix86_handle_struct_attribute (tree *node, tree name,
27734 tree args ATTRIBUTE_UNUSED,
27735 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27738 if (DECL_P (*node))
27740 if (TREE_CODE (*node) == TYPE_DECL)
27741 type = &TREE_TYPE (*node);
27746 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27747 || TREE_CODE (*type) == UNION_TYPE)))
27749 warning (OPT_Wattributes, "%qE attribute ignored",
27751 *no_add_attrs = true;
27754 else if ((is_attribute_p ("ms_struct", name)
27755 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27756 || ((is_attribute_p ("gcc_struct", name)
27757 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27759 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27761 *no_add_attrs = true;
27768 ix86_handle_fndecl_attribute (tree *node, tree name,
27769 tree args ATTRIBUTE_UNUSED,
27770 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27772 if (TREE_CODE (*node) != FUNCTION_DECL)
27774 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27776 *no_add_attrs = true;
27782 ix86_ms_bitfield_layout_p (const_tree record_type)
27784 return ((TARGET_MS_BITFIELD_LAYOUT
27785 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27786 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27789 /* Returns an expression indicating where the this parameter is
27790 located on entry to the FUNCTION. */
27793 x86_this_parameter (tree function)
27795 tree type = TREE_TYPE (function);
27796 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27801 const int *parm_regs;
27803 if (ix86_function_type_abi (type) == MS_ABI)
27804 parm_regs = x86_64_ms_abi_int_parameter_registers;
27806 parm_regs = x86_64_int_parameter_registers;
27807 return gen_rtx_REG (DImode, parm_regs[aggr]);
27810 nregs = ix86_function_regparm (type, function);
27812 if (nregs > 0 && !stdarg_p (type))
27816 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27817 regno = aggr ? DX_REG : CX_REG;
27818 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27822 return gen_rtx_MEM (SImode,
27823 plus_constant (stack_pointer_rtx, 4));
27832 return gen_rtx_MEM (SImode,
27833 plus_constant (stack_pointer_rtx, 4));
27836 return gen_rtx_REG (SImode, regno);
27839 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27842 /* Determine whether x86_output_mi_thunk can succeed. */
27845 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27846 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27847 HOST_WIDE_INT vcall_offset, const_tree function)
27849 /* 64-bit can handle anything. */
27853 /* For 32-bit, everything's fine if we have one free register. */
27854 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27857 /* Need a free register for vcall_offset. */
27861 /* Need a free register for GOT references. */
27862 if (flag_pic && !targetm.binds_local_p (function))
27865 /* Otherwise ok. */
27869 /* Output the assembler code for a thunk function. THUNK_DECL is the
27870 declaration for the thunk function itself, FUNCTION is the decl for
27871 the target function. DELTA is an immediate constant offset to be
27872 added to THIS. If VCALL_OFFSET is nonzero, the word at
27873 *(*this + vcall_offset) should be added to THIS. */
27876 x86_output_mi_thunk (FILE *file,
27877 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27878 HOST_WIDE_INT vcall_offset, tree function)
27881 rtx this_param = x86_this_parameter (function);
27884 /* Make sure unwind info is emitted for the thunk if needed. */
27885 final_start_function (emit_barrier (), file, 1);
27887 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27888 pull it in now and let DELTA benefit. */
27889 if (REG_P (this_param))
27890 this_reg = this_param;
27891 else if (vcall_offset)
27893 /* Put the this parameter into %eax. */
27894 xops[0] = this_param;
27895 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27896 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27899 this_reg = NULL_RTX;
27901 /* Adjust the this parameter by a fixed constant. */
27904 xops[0] = GEN_INT (delta);
27905 xops[1] = this_reg ? this_reg : this_param;
27908 if (!x86_64_general_operand (xops[0], DImode))
27910 tmp = gen_rtx_REG (DImode, R10_REG);
27912 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27914 xops[1] = this_param;
27916 if (x86_maybe_negate_const_int (&xops[0], DImode))
27917 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27919 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27921 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27922 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27924 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27927 /* Adjust the this parameter by a value stored in the vtable. */
27931 tmp = gen_rtx_REG (DImode, R10_REG);
27934 int tmp_regno = CX_REG;
27935 if (lookup_attribute ("fastcall",
27936 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27937 || lookup_attribute ("thiscall",
27938 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27939 tmp_regno = AX_REG;
27940 tmp = gen_rtx_REG (SImode, tmp_regno);
27943 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27945 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27947 /* Adjust the this parameter. */
27948 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27949 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27951 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27952 xops[0] = GEN_INT (vcall_offset);
27954 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27955 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27957 xops[1] = this_reg;
27958 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27961 /* If necessary, drop THIS back to its stack slot. */
27962 if (this_reg && this_reg != this_param)
27964 xops[0] = this_reg;
27965 xops[1] = this_param;
27966 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27969 xops[0] = XEXP (DECL_RTL (function), 0);
27972 if (!flag_pic || targetm.binds_local_p (function))
27973 output_asm_insn ("jmp\t%P0", xops);
27974 /* All thunks should be in the same object as their target,
27975 and thus binds_local_p should be true. */
27976 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27977 gcc_unreachable ();
27980 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27981 tmp = gen_rtx_CONST (Pmode, tmp);
27982 tmp = gen_rtx_MEM (QImode, tmp);
27984 output_asm_insn ("jmp\t%A0", xops);
27989 if (!flag_pic || targetm.binds_local_p (function))
27990 output_asm_insn ("jmp\t%P0", xops);
27995 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27996 if (TARGET_MACHO_BRANCH_ISLANDS)
27997 sym_ref = (gen_rtx_SYMBOL_REF
27999 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
28000 tmp = gen_rtx_MEM (QImode, sym_ref);
28002 output_asm_insn ("jmp\t%0", xops);
28005 #endif /* TARGET_MACHO */
28007 tmp = gen_rtx_REG (SImode, CX_REG);
28008 output_set_got (tmp, NULL_RTX);
28011 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
28012 output_asm_insn ("jmp\t{*}%1", xops);
28015 final_end_function ();
28019 x86_file_start (void)
28021 default_file_start ();
28023 darwin_file_start ();
28025 if (X86_FILE_START_VERSION_DIRECTIVE)
28026 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
28027 if (X86_FILE_START_FLTUSED)
28028 fputs ("\t.global\t__fltused\n", asm_out_file);
28029 if (ix86_asm_dialect == ASM_INTEL)
28030 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28034 x86_field_alignment (tree field, int computed)
28036 enum machine_mode mode;
28037 tree type = TREE_TYPE (field);
28039 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28041 mode = TYPE_MODE (strip_array_types (type));
28042 if (mode == DFmode || mode == DCmode
28043 || GET_MODE_CLASS (mode) == MODE_INT
28044 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28045 return MIN (32, computed);
28049 /* Output assembler code to FILE to increment profiler label # LABELNO
28050 for profiling a function entry. */
28052 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28054 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28059 #ifndef NO_PROFILE_COUNTERS
28060 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28063 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28064 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28066 fprintf (file, "\tcall\t%s\n", mcount_name);
28070 #ifndef NO_PROFILE_COUNTERS
28071 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28074 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28078 #ifndef NO_PROFILE_COUNTERS
28079 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28082 fprintf (file, "\tcall\t%s\n", mcount_name);
28086 /* We don't have exact information about the insn sizes, but we may assume
28087 quite safely that we are informed about all 1 byte insns and memory
28088 address sizes. This is enough to eliminate unnecessary padding in
28092 min_insn_size (rtx insn)
28096 if (!INSN_P (insn) || !active_insn_p (insn))
28099 /* Discard alignments we've emit and jump instructions. */
28100 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28101 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28103 if (JUMP_TABLE_DATA_P (insn))
28106 /* Important case - calls are always 5 bytes.
28107 It is common to have many calls in the row. */
28109 && symbolic_reference_mentioned_p (PATTERN (insn))
28110 && !SIBLING_CALL_P (insn))
28112 len = get_attr_length (insn);
28116 /* For normal instructions we rely on get_attr_length being exact,
28117 with a few exceptions. */
28118 if (!JUMP_P (insn))
28120 enum attr_type type = get_attr_type (insn);
28125 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28126 || asm_noperands (PATTERN (insn)) >= 0)
28133 /* Otherwise trust get_attr_length. */
28137 l = get_attr_length_address (insn);
28138 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28147 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28149 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28153 ix86_avoid_jump_mispredicts (void)
28155 rtx insn, start = get_insns ();
28156 int nbytes = 0, njumps = 0;
28159 /* Look for all minimal intervals of instructions containing 4 jumps.
28160 The intervals are bounded by START and INSN. NBYTES is the total
28161 size of instructions in the interval including INSN and not including
28162 START. When the NBYTES is smaller than 16 bytes, it is possible
28163 that the end of START and INSN ends up in the same 16byte page.
28165 The smallest offset in the page INSN can start is the case where START
28166 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28167 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28169 for (insn = start; insn; insn = NEXT_INSN (insn))
28173 if (LABEL_P (insn))
28175 int align = label_to_alignment (insn);
28176 int max_skip = label_to_max_skip (insn);
28180 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28181 already in the current 16 byte page, because otherwise
28182 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28183 bytes to reach 16 byte boundary. */
28185 || (align <= 3 && max_skip != (1 << align) - 1))
28188 fprintf (dump_file, "Label %i with max_skip %i\n",
28189 INSN_UID (insn), max_skip);
28192 while (nbytes + max_skip >= 16)
28194 start = NEXT_INSN (start);
28195 if ((JUMP_P (start)
28196 && GET_CODE (PATTERN (start)) != ADDR_VEC
28197 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28199 njumps--, isjump = 1;
28202 nbytes -= min_insn_size (start);
28208 min_size = min_insn_size (insn);
28209 nbytes += min_size;
28211 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28212 INSN_UID (insn), min_size);
28214 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28215 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28223 start = NEXT_INSN (start);
28224 if ((JUMP_P (start)
28225 && GET_CODE (PATTERN (start)) != ADDR_VEC
28226 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28228 njumps--, isjump = 1;
28231 nbytes -= min_insn_size (start);
28233 gcc_assert (njumps >= 0);
28235 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28236 INSN_UID (start), INSN_UID (insn), nbytes);
28238 if (njumps == 3 && isjump && nbytes < 16)
28240 int padsize = 15 - nbytes + min_insn_size (insn);
28243 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28244 INSN_UID (insn), padsize);
28245 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28251 /* AMD Athlon works faster
28252 when RET is not destination of conditional jump or directly preceded
28253 by other jump instruction. We avoid the penalty by inserting NOP just
28254 before the RET instructions in such cases. */
28256 ix86_pad_returns (void)
28261 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28263 basic_block bb = e->src;
28264 rtx ret = BB_END (bb);
28266 bool replace = false;
28268 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28269 || optimize_bb_for_size_p (bb))
28271 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28272 if (active_insn_p (prev) || LABEL_P (prev))
28274 if (prev && LABEL_P (prev))
28279 FOR_EACH_EDGE (e, ei, bb->preds)
28280 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28281 && !(e->flags & EDGE_FALLTHRU))
28286 prev = prev_active_insn (ret);
28288 && ((JUMP_P (prev) && any_condjump_p (prev))
28291 /* Empty functions get branch mispredict even when the jump destination
28292 is not visible to us. */
28293 if (!prev && !optimize_function_for_size_p (cfun))
28298 emit_jump_insn_before (gen_return_internal_long (), ret);
28304 /* Count the minimum number of instructions in BB. Return 4 if the
28305 number of instructions >= 4. */
28308 ix86_count_insn_bb (basic_block bb)
28311 int insn_count = 0;
28313 /* Count number of instructions in this block. Return 4 if the number
28314 of instructions >= 4. */
28315 FOR_BB_INSNS (bb, insn)
28317 /* Only happen in exit blocks. */
28319 && GET_CODE (PATTERN (insn)) == RETURN)
28322 if (NONDEBUG_INSN_P (insn)
28323 && GET_CODE (PATTERN (insn)) != USE
28324 && GET_CODE (PATTERN (insn)) != CLOBBER)
28327 if (insn_count >= 4)
28336 /* Count the minimum number of instructions in code path in BB.
28337 Return 4 if the number of instructions >= 4. */
28340 ix86_count_insn (basic_block bb)
28344 int min_prev_count;
28346 /* Only bother counting instructions along paths with no
28347 more than 2 basic blocks between entry and exit. Given
28348 that BB has an edge to exit, determine if a predecessor
28349 of BB has an edge from entry. If so, compute the number
28350 of instructions in the predecessor block. If there
28351 happen to be multiple such blocks, compute the minimum. */
28352 min_prev_count = 4;
28353 FOR_EACH_EDGE (e, ei, bb->preds)
28356 edge_iterator prev_ei;
28358 if (e->src == ENTRY_BLOCK_PTR)
28360 min_prev_count = 0;
28363 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28365 if (prev_e->src == ENTRY_BLOCK_PTR)
28367 int count = ix86_count_insn_bb (e->src);
28368 if (count < min_prev_count)
28369 min_prev_count = count;
28375 if (min_prev_count < 4)
28376 min_prev_count += ix86_count_insn_bb (bb);
28378 return min_prev_count;
28381 /* Pad short funtion to 4 instructions. */
28384 ix86_pad_short_function (void)
28389 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28391 rtx ret = BB_END (e->src);
28392 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28394 int insn_count = ix86_count_insn (e->src);
28396 /* Pad short function. */
28397 if (insn_count < 4)
28401 /* Find epilogue. */
28404 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28405 insn = PREV_INSN (insn);
28410 /* Two NOPs are counted as one instruction. */
28411 insn_count = 2 * (4 - insn_count);
28412 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28418 /* Implement machine specific optimizations. We implement padding of returns
28419 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28423 if (optimize && optimize_function_for_speed_p (cfun))
28425 if (TARGET_PAD_SHORT_FUNCTION)
28426 ix86_pad_short_function ();
28427 else if (TARGET_PAD_RETURNS)
28428 ix86_pad_returns ();
28429 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28430 if (TARGET_FOUR_JUMP_LIMIT)
28431 ix86_avoid_jump_mispredicts ();
28436 /* Return nonzero when QImode register that must be represented via REX prefix
28439 x86_extended_QIreg_mentioned_p (rtx insn)
28442 extract_insn_cached (insn);
28443 for (i = 0; i < recog_data.n_operands; i++)
28444 if (REG_P (recog_data.operand[i])
28445 && REGNO (recog_data.operand[i]) > BX_REG)
28450 /* Return nonzero when P points to register encoded via REX prefix.
28451 Called via for_each_rtx. */
28453 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28455 unsigned int regno;
28458 regno = REGNO (*p);
28459 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28462 /* Return true when INSN mentions register that must be encoded using REX
28465 x86_extended_reg_mentioned_p (rtx insn)
28467 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28468 extended_reg_mentioned_1, NULL);
28471 /* If profitable, negate (without causing overflow) integer constant
28472 of mode MODE at location LOC. Return true in this case. */
28474 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28478 if (!CONST_INT_P (*loc))
28484 /* DImode x86_64 constants must fit in 32 bits. */
28485 gcc_assert (x86_64_immediate_operand (*loc, mode));
28496 gcc_unreachable ();
28499 /* Avoid overflows. */
28500 if (mode_signbit_p (mode, *loc))
28503 val = INTVAL (*loc);
28505 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28506 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28507 if ((val < 0 && val != -128)
28510 *loc = GEN_INT (-val);
28517 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28518 optabs would emit if we didn't have TFmode patterns. */
28521 x86_emit_floatuns (rtx operands[2])
28523 rtx neglab, donelab, i0, i1, f0, in, out;
28524 enum machine_mode mode, inmode;
28526 inmode = GET_MODE (operands[1]);
28527 gcc_assert (inmode == SImode || inmode == DImode);
28530 in = force_reg (inmode, operands[1]);
28531 mode = GET_MODE (out);
28532 neglab = gen_label_rtx ();
28533 donelab = gen_label_rtx ();
28534 f0 = gen_reg_rtx (mode);
28536 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28538 expand_float (out, in, 0);
28540 emit_jump_insn (gen_jump (donelab));
28543 emit_label (neglab);
28545 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28547 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28549 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28551 expand_float (f0, i0, 0);
28553 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28555 emit_label (donelab);
28558 /* AVX does not support 32-byte integer vector operations,
28559 thus the longest vector we are faced with is V16QImode. */
28560 #define MAX_VECT_LEN 16
28562 struct expand_vec_perm_d
28564 rtx target, op0, op1;
28565 unsigned char perm[MAX_VECT_LEN];
28566 enum machine_mode vmode;
28567 unsigned char nelt;
28571 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28572 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28574 /* Get a vector mode of the same size as the original but with elements
28575 twice as wide. This is only guaranteed to apply to integral vectors. */
28577 static inline enum machine_mode
28578 get_mode_wider_vector (enum machine_mode o)
28580 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28581 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28582 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28583 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28587 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28588 with all elements equal to VAR. Return true if successful. */
28591 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28592 rtx target, rtx val)
28615 /* First attempt to recognize VAL as-is. */
28616 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28617 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28618 if (recog_memoized (insn) < 0)
28621 /* If that fails, force VAL into a register. */
28624 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28625 seq = get_insns ();
28628 emit_insn_before (seq, insn);
28630 ok = recog_memoized (insn) >= 0;
28639 if (TARGET_SSE || TARGET_3DNOW_A)
28643 val = gen_lowpart (SImode, val);
28644 x = gen_rtx_TRUNCATE (HImode, val);
28645 x = gen_rtx_VEC_DUPLICATE (mode, x);
28646 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28659 struct expand_vec_perm_d dperm;
28663 memset (&dperm, 0, sizeof (dperm));
28664 dperm.target = target;
28665 dperm.vmode = mode;
28666 dperm.nelt = GET_MODE_NUNITS (mode);
28667 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28669 /* Extend to SImode using a paradoxical SUBREG. */
28670 tmp1 = gen_reg_rtx (SImode);
28671 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28673 /* Insert the SImode value as low element of a V4SImode vector. */
28674 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28675 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28677 ok = (expand_vec_perm_1 (&dperm)
28678 || expand_vec_perm_broadcast_1 (&dperm));
28690 /* Replicate the value once into the next wider mode and recurse. */
28692 enum machine_mode smode, wsmode, wvmode;
28695 smode = GET_MODE_INNER (mode);
28696 wvmode = get_mode_wider_vector (mode);
28697 wsmode = GET_MODE_INNER (wvmode);
28699 val = convert_modes (wsmode, smode, val, true);
28700 x = expand_simple_binop (wsmode, ASHIFT, val,
28701 GEN_INT (GET_MODE_BITSIZE (smode)),
28702 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28703 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28705 x = gen_lowpart (wvmode, target);
28706 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28714 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28715 rtx x = gen_reg_rtx (hvmode);
28717 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28720 x = gen_rtx_VEC_CONCAT (mode, x, x);
28721 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28730 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28731 whose ONE_VAR element is VAR, and other elements are zero. Return true
28735 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28736 rtx target, rtx var, int one_var)
28738 enum machine_mode vsimode;
28741 bool use_vector_set = false;
28746 /* For SSE4.1, we normally use vector set. But if the second
28747 element is zero and inter-unit moves are OK, we use movq
28749 use_vector_set = (TARGET_64BIT
28751 && !(TARGET_INTER_UNIT_MOVES
28757 use_vector_set = TARGET_SSE4_1;
28760 use_vector_set = TARGET_SSE2;
28763 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28770 use_vector_set = TARGET_AVX;
28773 /* Use ix86_expand_vector_set in 64bit mode only. */
28774 use_vector_set = TARGET_AVX && TARGET_64BIT;
28780 if (use_vector_set)
28782 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28783 var = force_reg (GET_MODE_INNER (mode), var);
28784 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28800 var = force_reg (GET_MODE_INNER (mode), var);
28801 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28802 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28807 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28808 new_target = gen_reg_rtx (mode);
28810 new_target = target;
28811 var = force_reg (GET_MODE_INNER (mode), var);
28812 x = gen_rtx_VEC_DUPLICATE (mode, var);
28813 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28814 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28817 /* We need to shuffle the value to the correct position, so
28818 create a new pseudo to store the intermediate result. */
28820 /* With SSE2, we can use the integer shuffle insns. */
28821 if (mode != V4SFmode && TARGET_SSE2)
28823 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28825 GEN_INT (one_var == 1 ? 0 : 1),
28826 GEN_INT (one_var == 2 ? 0 : 1),
28827 GEN_INT (one_var == 3 ? 0 : 1)));
28828 if (target != new_target)
28829 emit_move_insn (target, new_target);
28833 /* Otherwise convert the intermediate result to V4SFmode and
28834 use the SSE1 shuffle instructions. */
28835 if (mode != V4SFmode)
28837 tmp = gen_reg_rtx (V4SFmode);
28838 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28843 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28845 GEN_INT (one_var == 1 ? 0 : 1),
28846 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28847 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28849 if (mode != V4SFmode)
28850 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28851 else if (tmp != target)
28852 emit_move_insn (target, tmp);
28854 else if (target != new_target)
28855 emit_move_insn (target, new_target);
28860 vsimode = V4SImode;
28866 vsimode = V2SImode;
28872 /* Zero extend the variable element to SImode and recurse. */
28873 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28875 x = gen_reg_rtx (vsimode);
28876 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28878 gcc_unreachable ();
28880 emit_move_insn (target, gen_lowpart (mode, x));
28888 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28889 consisting of the values in VALS. It is known that all elements
28890 except ONE_VAR are constants. Return true if successful. */
28893 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28894 rtx target, rtx vals, int one_var)
28896 rtx var = XVECEXP (vals, 0, one_var);
28897 enum machine_mode wmode;
28900 const_vec = copy_rtx (vals);
28901 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28902 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28910 /* For the two element vectors, it's just as easy to use
28911 the general case. */
28915 /* Use ix86_expand_vector_set in 64bit mode only. */
28938 /* There's no way to set one QImode entry easily. Combine
28939 the variable value with its adjacent constant value, and
28940 promote to an HImode set. */
28941 x = XVECEXP (vals, 0, one_var ^ 1);
28944 var = convert_modes (HImode, QImode, var, true);
28945 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28946 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28947 x = GEN_INT (INTVAL (x) & 0xff);
28951 var = convert_modes (HImode, QImode, var, true);
28952 x = gen_int_mode (INTVAL (x) << 8, HImode);
28954 if (x != const0_rtx)
28955 var = expand_simple_binop (HImode, IOR, var, x, var,
28956 1, OPTAB_LIB_WIDEN);
28958 x = gen_reg_rtx (wmode);
28959 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28960 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28962 emit_move_insn (target, gen_lowpart (mode, x));
28969 emit_move_insn (target, const_vec);
28970 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28974 /* A subroutine of ix86_expand_vector_init_general. Use vector
28975 concatenate to handle the most general case: all values variable,
28976 and none identical. */
28979 ix86_expand_vector_init_concat (enum machine_mode mode,
28980 rtx target, rtx *ops, int n)
28982 enum machine_mode cmode, hmode = VOIDmode;
28983 rtx first[8], second[4];
29023 gcc_unreachable ();
29026 if (!register_operand (ops[1], cmode))
29027 ops[1] = force_reg (cmode, ops[1]);
29028 if (!register_operand (ops[0], cmode))
29029 ops[0] = force_reg (cmode, ops[0]);
29030 emit_insn (gen_rtx_SET (VOIDmode, target,
29031 gen_rtx_VEC_CONCAT (mode, ops[0],
29051 gcc_unreachable ();
29067 gcc_unreachable ();
29072 /* FIXME: We process inputs backward to help RA. PR 36222. */
29075 for (; i > 0; i -= 2, j--)
29077 first[j] = gen_reg_rtx (cmode);
29078 v = gen_rtvec (2, ops[i - 1], ops[i]);
29079 ix86_expand_vector_init (false, first[j],
29080 gen_rtx_PARALLEL (cmode, v));
29086 gcc_assert (hmode != VOIDmode);
29087 for (i = j = 0; i < n; i += 2, j++)
29089 second[j] = gen_reg_rtx (hmode);
29090 ix86_expand_vector_init_concat (hmode, second [j],
29094 ix86_expand_vector_init_concat (mode, target, second, n);
29097 ix86_expand_vector_init_concat (mode, target, first, n);
29101 gcc_unreachable ();
29105 /* A subroutine of ix86_expand_vector_init_general. Use vector
29106 interleave to handle the most general case: all values variable,
29107 and none identical. */
29110 ix86_expand_vector_init_interleave (enum machine_mode mode,
29111 rtx target, rtx *ops, int n)
29113 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29116 rtx (*gen_load_even) (rtx, rtx, rtx);
29117 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29118 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29123 gen_load_even = gen_vec_setv8hi;
29124 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29125 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29126 inner_mode = HImode;
29127 first_imode = V4SImode;
29128 second_imode = V2DImode;
29129 third_imode = VOIDmode;
29132 gen_load_even = gen_vec_setv16qi;
29133 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29134 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29135 inner_mode = QImode;
29136 first_imode = V8HImode;
29137 second_imode = V4SImode;
29138 third_imode = V2DImode;
29141 gcc_unreachable ();
29144 for (i = 0; i < n; i++)
29146 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29147 op0 = gen_reg_rtx (SImode);
29148 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29150 /* Insert the SImode value as low element of V4SImode vector. */
29151 op1 = gen_reg_rtx (V4SImode);
29152 op0 = gen_rtx_VEC_MERGE (V4SImode,
29153 gen_rtx_VEC_DUPLICATE (V4SImode,
29155 CONST0_RTX (V4SImode),
29157 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29159 /* Cast the V4SImode vector back to a vector in orignal mode. */
29160 op0 = gen_reg_rtx (mode);
29161 emit_move_insn (op0, gen_lowpart (mode, op1));
29163 /* Load even elements into the second positon. */
29164 emit_insn (gen_load_even (op0,
29165 force_reg (inner_mode,
29169 /* Cast vector to FIRST_IMODE vector. */
29170 ops[i] = gen_reg_rtx (first_imode);
29171 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29174 /* Interleave low FIRST_IMODE vectors. */
29175 for (i = j = 0; i < n; i += 2, j++)
29177 op0 = gen_reg_rtx (first_imode);
29178 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29180 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29181 ops[j] = gen_reg_rtx (second_imode);
29182 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29185 /* Interleave low SECOND_IMODE vectors. */
29186 switch (second_imode)
29189 for (i = j = 0; i < n / 2; i += 2, j++)
29191 op0 = gen_reg_rtx (second_imode);
29192 emit_insn (gen_interleave_second_low (op0, ops[i],
29195 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29197 ops[j] = gen_reg_rtx (third_imode);
29198 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29200 second_imode = V2DImode;
29201 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29205 op0 = gen_reg_rtx (second_imode);
29206 emit_insn (gen_interleave_second_low (op0, ops[0],
29209 /* Cast the SECOND_IMODE vector back to a vector on original
29211 emit_insn (gen_rtx_SET (VOIDmode, target,
29212 gen_lowpart (mode, op0)));
29216 gcc_unreachable ();
29220 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29221 all values variable, and none identical. */
29224 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29225 rtx target, rtx vals)
29227 rtx ops[32], op0, op1;
29228 enum machine_mode half_mode = VOIDmode;
29235 if (!mmx_ok && !TARGET_SSE)
29247 n = GET_MODE_NUNITS (mode);
29248 for (i = 0; i < n; i++)
29249 ops[i] = XVECEXP (vals, 0, i);
29250 ix86_expand_vector_init_concat (mode, target, ops, n);
29254 half_mode = V16QImode;
29258 half_mode = V8HImode;
29262 n = GET_MODE_NUNITS (mode);
29263 for (i = 0; i < n; i++)
29264 ops[i] = XVECEXP (vals, 0, i);
29265 op0 = gen_reg_rtx (half_mode);
29266 op1 = gen_reg_rtx (half_mode);
29267 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29269 ix86_expand_vector_init_interleave (half_mode, op1,
29270 &ops [n >> 1], n >> 2);
29271 emit_insn (gen_rtx_SET (VOIDmode, target,
29272 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29276 if (!TARGET_SSE4_1)
29284 /* Don't use ix86_expand_vector_init_interleave if we can't
29285 move from GPR to SSE register directly. */
29286 if (!TARGET_INTER_UNIT_MOVES)
29289 n = GET_MODE_NUNITS (mode);
29290 for (i = 0; i < n; i++)
29291 ops[i] = XVECEXP (vals, 0, i);
29292 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29300 gcc_unreachable ();
29304 int i, j, n_elts, n_words, n_elt_per_word;
29305 enum machine_mode inner_mode;
29306 rtx words[4], shift;
29308 inner_mode = GET_MODE_INNER (mode);
29309 n_elts = GET_MODE_NUNITS (mode);
29310 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29311 n_elt_per_word = n_elts / n_words;
29312 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29314 for (i = 0; i < n_words; ++i)
29316 rtx word = NULL_RTX;
29318 for (j = 0; j < n_elt_per_word; ++j)
29320 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29321 elt = convert_modes (word_mode, inner_mode, elt, true);
29327 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29328 word, 1, OPTAB_LIB_WIDEN);
29329 word = expand_simple_binop (word_mode, IOR, word, elt,
29330 word, 1, OPTAB_LIB_WIDEN);
29338 emit_move_insn (target, gen_lowpart (mode, words[0]));
29339 else if (n_words == 2)
29341 rtx tmp = gen_reg_rtx (mode);
29342 emit_clobber (tmp);
29343 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29344 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29345 emit_move_insn (target, tmp);
29347 else if (n_words == 4)
29349 rtx tmp = gen_reg_rtx (V4SImode);
29350 gcc_assert (word_mode == SImode);
29351 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29352 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29353 emit_move_insn (target, gen_lowpart (mode, tmp));
29356 gcc_unreachable ();
29360 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29361 instructions unless MMX_OK is true. */
29364 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29366 enum machine_mode mode = GET_MODE (target);
29367 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29368 int n_elts = GET_MODE_NUNITS (mode);
29369 int n_var = 0, one_var = -1;
29370 bool all_same = true, all_const_zero = true;
29374 for (i = 0; i < n_elts; ++i)
29376 x = XVECEXP (vals, 0, i);
29377 if (!(CONST_INT_P (x)
29378 || GET_CODE (x) == CONST_DOUBLE
29379 || GET_CODE (x) == CONST_FIXED))
29380 n_var++, one_var = i;
29381 else if (x != CONST0_RTX (inner_mode))
29382 all_const_zero = false;
29383 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
29387 /* Constants are best loaded from the constant pool. */
29390 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29394 /* If all values are identical, broadcast the value. */
29396 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29397 XVECEXP (vals, 0, 0)))
29400 /* Values where only one field is non-constant are best loaded from
29401 the pool and overwritten via move later. */
29405 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29406 XVECEXP (vals, 0, one_var),
29410 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29414 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29418 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29420 enum machine_mode mode = GET_MODE (target);
29421 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29422 enum machine_mode half_mode;
29423 bool use_vec_merge = false;
29425 static rtx (*gen_extract[6][2]) (rtx, rtx)
29427 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29428 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29429 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29430 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29431 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29432 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29434 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29436 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29437 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29438 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29439 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29440 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29441 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29451 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29452 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29454 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29456 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29457 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29463 use_vec_merge = TARGET_SSE4_1;
29471 /* For the two element vectors, we implement a VEC_CONCAT with
29472 the extraction of the other element. */
29474 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29475 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29478 op0 = val, op1 = tmp;
29480 op0 = tmp, op1 = val;
29482 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29483 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29488 use_vec_merge = TARGET_SSE4_1;
29495 use_vec_merge = true;
29499 /* tmp = target = A B C D */
29500 tmp = copy_to_reg (target);
29501 /* target = A A B B */
29502 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29503 /* target = X A B B */
29504 ix86_expand_vector_set (false, target, val, 0);
29505 /* target = A X C D */
29506 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29507 const1_rtx, const0_rtx,
29508 GEN_INT (2+4), GEN_INT (3+4)));
29512 /* tmp = target = A B C D */
29513 tmp = copy_to_reg (target);
29514 /* tmp = X B C D */
29515 ix86_expand_vector_set (false, tmp, val, 0);
29516 /* target = A B X D */
29517 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29518 const0_rtx, const1_rtx,
29519 GEN_INT (0+4), GEN_INT (3+4)));
29523 /* tmp = target = A B C D */
29524 tmp = copy_to_reg (target);
29525 /* tmp = X B C D */
29526 ix86_expand_vector_set (false, tmp, val, 0);
29527 /* target = A B X D */
29528 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29529 const0_rtx, const1_rtx,
29530 GEN_INT (2+4), GEN_INT (0+4)));
29534 gcc_unreachable ();
29539 use_vec_merge = TARGET_SSE4_1;
29543 /* Element 0 handled by vec_merge below. */
29546 use_vec_merge = true;
29552 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29553 store into element 0, then shuffle them back. */
29557 order[0] = GEN_INT (elt);
29558 order[1] = const1_rtx;
29559 order[2] = const2_rtx;
29560 order[3] = GEN_INT (3);
29561 order[elt] = const0_rtx;
29563 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29564 order[1], order[2], order[3]));
29566 ix86_expand_vector_set (false, target, val, 0);
29568 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29569 order[1], order[2], order[3]));
29573 /* For SSE1, we have to reuse the V4SF code. */
29574 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29575 gen_lowpart (SFmode, val), elt);
29580 use_vec_merge = TARGET_SSE2;
29583 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29587 use_vec_merge = TARGET_SSE4_1;
29594 half_mode = V16QImode;
29600 half_mode = V8HImode;
29606 half_mode = V4SImode;
29612 half_mode = V2DImode;
29618 half_mode = V4SFmode;
29624 half_mode = V2DFmode;
29630 /* Compute offset. */
29634 gcc_assert (i <= 1);
29636 /* Extract the half. */
29637 tmp = gen_reg_rtx (half_mode);
29638 emit_insn (gen_extract[j][i] (tmp, target));
29640 /* Put val in tmp at elt. */
29641 ix86_expand_vector_set (false, tmp, val, elt);
29644 emit_insn (gen_insert[j][i] (target, target, tmp));
29653 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29654 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29655 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29659 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29661 emit_move_insn (mem, target);
29663 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29664 emit_move_insn (tmp, val);
29666 emit_move_insn (target, mem);
29671 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29673 enum machine_mode mode = GET_MODE (vec);
29674 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29675 bool use_vec_extr = false;
29688 use_vec_extr = true;
29692 use_vec_extr = TARGET_SSE4_1;
29704 tmp = gen_reg_rtx (mode);
29705 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29706 GEN_INT (elt), GEN_INT (elt),
29707 GEN_INT (elt+4), GEN_INT (elt+4)));
29711 tmp = gen_reg_rtx (mode);
29712 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29716 gcc_unreachable ();
29719 use_vec_extr = true;
29724 use_vec_extr = TARGET_SSE4_1;
29738 tmp = gen_reg_rtx (mode);
29739 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29740 GEN_INT (elt), GEN_INT (elt),
29741 GEN_INT (elt), GEN_INT (elt)));
29745 tmp = gen_reg_rtx (mode);
29746 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29750 gcc_unreachable ();
29753 use_vec_extr = true;
29758 /* For SSE1, we have to reuse the V4SF code. */
29759 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29760 gen_lowpart (V4SFmode, vec), elt);
29766 use_vec_extr = TARGET_SSE2;
29769 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29773 use_vec_extr = TARGET_SSE4_1;
29777 /* ??? Could extract the appropriate HImode element and shift. */
29784 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29785 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29787 /* Let the rtl optimizers know about the zero extension performed. */
29788 if (inner_mode == QImode || inner_mode == HImode)
29790 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29791 target = gen_lowpart (SImode, target);
29794 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29798 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29800 emit_move_insn (mem, vec);
29802 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29803 emit_move_insn (target, tmp);
29807 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29808 pattern to reduce; DEST is the destination; IN is the input vector. */
29811 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29813 rtx tmp1, tmp2, tmp3;
29815 tmp1 = gen_reg_rtx (V4SFmode);
29816 tmp2 = gen_reg_rtx (V4SFmode);
29817 tmp3 = gen_reg_rtx (V4SFmode);
29819 emit_insn (gen_sse_movhlps (tmp1, in, in));
29820 emit_insn (fn (tmp2, tmp1, in));
29822 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29823 const1_rtx, const1_rtx,
29824 GEN_INT (1+4), GEN_INT (1+4)));
29825 emit_insn (fn (dest, tmp2, tmp3));
29828 /* Target hook for scalar_mode_supported_p. */
29830 ix86_scalar_mode_supported_p (enum machine_mode mode)
29832 if (DECIMAL_FLOAT_MODE_P (mode))
29833 return default_decimal_float_supported_p ();
29834 else if (mode == TFmode)
29837 return default_scalar_mode_supported_p (mode);
29840 /* Implements target hook vector_mode_supported_p. */
29842 ix86_vector_mode_supported_p (enum machine_mode mode)
29844 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29846 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29848 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29850 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29852 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29857 /* Target hook for c_mode_for_suffix. */
29858 static enum machine_mode
29859 ix86_c_mode_for_suffix (char suffix)
29869 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29871 We do this in the new i386 backend to maintain source compatibility
29872 with the old cc0-based compiler. */
29875 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29876 tree inputs ATTRIBUTE_UNUSED,
29879 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29881 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29886 /* Implements target vector targetm.asm.encode_section_info. This
29887 is not used by netware. */
29889 static void ATTRIBUTE_UNUSED
29890 ix86_encode_section_info (tree decl, rtx rtl, int first)
29892 default_encode_section_info (decl, rtl, first);
29894 if (TREE_CODE (decl) == VAR_DECL
29895 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29896 && ix86_in_large_data_p (decl))
29897 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29900 /* Worker function for REVERSE_CONDITION. */
29903 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29905 return (mode != CCFPmode && mode != CCFPUmode
29906 ? reverse_condition (code)
29907 : reverse_condition_maybe_unordered (code));
29910 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29914 output_387_reg_move (rtx insn, rtx *operands)
29916 if (REG_P (operands[0]))
29918 if (REG_P (operands[1])
29919 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29921 if (REGNO (operands[0]) == FIRST_STACK_REG)
29922 return output_387_ffreep (operands, 0);
29923 return "fstp\t%y0";
29925 if (STACK_TOP_P (operands[0]))
29926 return "fld%Z1\t%y1";
29929 else if (MEM_P (operands[0]))
29931 gcc_assert (REG_P (operands[1]));
29932 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29933 return "fstp%Z0\t%y0";
29936 /* There is no non-popping store to memory for XFmode.
29937 So if we need one, follow the store with a load. */
29938 if (GET_MODE (operands[0]) == XFmode)
29939 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29941 return "fst%Z0\t%y0";
29948 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29949 FP status register is set. */
29952 ix86_emit_fp_unordered_jump (rtx label)
29954 rtx reg = gen_reg_rtx (HImode);
29957 emit_insn (gen_x86_fnstsw_1 (reg));
29959 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29961 emit_insn (gen_x86_sahf_1 (reg));
29963 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29964 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29968 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29970 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29971 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29974 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29975 gen_rtx_LABEL_REF (VOIDmode, label),
29977 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29979 emit_jump_insn (temp);
29980 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29983 /* Output code to perform a log1p XFmode calculation. */
29985 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29987 rtx label1 = gen_label_rtx ();
29988 rtx label2 = gen_label_rtx ();
29990 rtx tmp = gen_reg_rtx (XFmode);
29991 rtx tmp2 = gen_reg_rtx (XFmode);
29994 emit_insn (gen_absxf2 (tmp, op1));
29995 test = gen_rtx_GE (VOIDmode, tmp,
29996 CONST_DOUBLE_FROM_REAL_VALUE (
29997 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29999 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
30001 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30002 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
30003 emit_jump (label2);
30005 emit_label (label1);
30006 emit_move_insn (tmp, CONST1_RTX (XFmode));
30007 emit_insn (gen_addxf3 (tmp, op1, tmp));
30008 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30009 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
30011 emit_label (label2);
30014 /* Output code to perform a Newton-Rhapson approximation of a single precision
30015 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
30017 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
30019 rtx x0, x1, e0, e1, two;
30021 x0 = gen_reg_rtx (mode);
30022 e0 = gen_reg_rtx (mode);
30023 e1 = gen_reg_rtx (mode);
30024 x1 = gen_reg_rtx (mode);
30026 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
30028 if (VECTOR_MODE_P (mode))
30029 two = ix86_build_const_vector (SFmode, true, two);
30031 two = force_reg (mode, two);
30033 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30035 /* x0 = rcp(b) estimate */
30036 emit_insn (gen_rtx_SET (VOIDmode, x0,
30037 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30040 emit_insn (gen_rtx_SET (VOIDmode, e0,
30041 gen_rtx_MULT (mode, x0, a)));
30043 emit_insn (gen_rtx_SET (VOIDmode, e1,
30044 gen_rtx_MULT (mode, x0, b)));
30046 emit_insn (gen_rtx_SET (VOIDmode, x1,
30047 gen_rtx_MINUS (mode, two, e1)));
30048 /* res = e0 * x1 */
30049 emit_insn (gen_rtx_SET (VOIDmode, res,
30050 gen_rtx_MULT (mode, e0, x1)));
30053 /* Output code to perform a Newton-Rhapson approximation of a
30054 single precision floating point [reciprocal] square root. */
30056 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30059 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30062 x0 = gen_reg_rtx (mode);
30063 e0 = gen_reg_rtx (mode);
30064 e1 = gen_reg_rtx (mode);
30065 e2 = gen_reg_rtx (mode);
30066 e3 = gen_reg_rtx (mode);
30068 real_from_integer (&r, VOIDmode, -3, -1, 0);
30069 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30071 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30072 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30074 if (VECTOR_MODE_P (mode))
30076 mthree = ix86_build_const_vector (SFmode, true, mthree);
30077 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
30080 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30081 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30083 /* x0 = rsqrt(a) estimate */
30084 emit_insn (gen_rtx_SET (VOIDmode, x0,
30085 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30088 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30093 zero = gen_reg_rtx (mode);
30094 mask = gen_reg_rtx (mode);
30096 zero = force_reg (mode, CONST0_RTX(mode));
30097 emit_insn (gen_rtx_SET (VOIDmode, mask,
30098 gen_rtx_NE (mode, zero, a)));
30100 emit_insn (gen_rtx_SET (VOIDmode, x0,
30101 gen_rtx_AND (mode, x0, mask)));
30105 emit_insn (gen_rtx_SET (VOIDmode, e0,
30106 gen_rtx_MULT (mode, x0, a)));
30108 emit_insn (gen_rtx_SET (VOIDmode, e1,
30109 gen_rtx_MULT (mode, e0, x0)));
30112 mthree = force_reg (mode, mthree);
30113 emit_insn (gen_rtx_SET (VOIDmode, e2,
30114 gen_rtx_PLUS (mode, e1, mthree)));
30116 mhalf = force_reg (mode, mhalf);
30118 /* e3 = -.5 * x0 */
30119 emit_insn (gen_rtx_SET (VOIDmode, e3,
30120 gen_rtx_MULT (mode, x0, mhalf)));
30122 /* e3 = -.5 * e0 */
30123 emit_insn (gen_rtx_SET (VOIDmode, e3,
30124 gen_rtx_MULT (mode, e0, mhalf)));
30125 /* ret = e2 * e3 */
30126 emit_insn (gen_rtx_SET (VOIDmode, res,
30127 gen_rtx_MULT (mode, e2, e3)));
30130 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30132 static void ATTRIBUTE_UNUSED
30133 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30136 /* With Binutils 2.15, the "@unwind" marker must be specified on
30137 every occurrence of the ".eh_frame" section, not just the first
30140 && strcmp (name, ".eh_frame") == 0)
30142 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30143 flags & SECTION_WRITE ? "aw" : "a");
30146 default_elf_asm_named_section (name, flags, decl);
30149 /* Return the mangling of TYPE if it is an extended fundamental type. */
30151 static const char *
30152 ix86_mangle_type (const_tree type)
30154 type = TYPE_MAIN_VARIANT (type);
30156 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30157 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30160 switch (TYPE_MODE (type))
30163 /* __float128 is "g". */
30166 /* "long double" or __float80 is "e". */
30173 /* For 32-bit code we can save PIC register setup by using
30174 __stack_chk_fail_local hidden function instead of calling
30175 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30176 register, so it is better to call __stack_chk_fail directly. */
30179 ix86_stack_protect_fail (void)
30181 return TARGET_64BIT
30182 ? default_external_stack_protect_fail ()
30183 : default_hidden_stack_protect_fail ();
30186 /* Select a format to encode pointers in exception handling data. CODE
30187 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30188 true if the symbol may be affected by dynamic relocations.
30190 ??? All x86 object file formats are capable of representing this.
30191 After all, the relocation needed is the same as for the call insn.
30192 Whether or not a particular assembler allows us to enter such, I
30193 guess we'll have to see. */
30195 asm_preferred_eh_data_format (int code, int global)
30199 int type = DW_EH_PE_sdata8;
30201 || ix86_cmodel == CM_SMALL_PIC
30202 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30203 type = DW_EH_PE_sdata4;
30204 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30206 if (ix86_cmodel == CM_SMALL
30207 || (ix86_cmodel == CM_MEDIUM && code))
30208 return DW_EH_PE_udata4;
30209 return DW_EH_PE_absptr;
30212 /* Expand copysign from SIGN to the positive value ABS_VALUE
30213 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30216 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30218 enum machine_mode mode = GET_MODE (sign);
30219 rtx sgn = gen_reg_rtx (mode);
30220 if (mask == NULL_RTX)
30222 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
30223 if (!VECTOR_MODE_P (mode))
30225 /* We need to generate a scalar mode mask in this case. */
30226 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30227 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30228 mask = gen_reg_rtx (mode);
30229 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30233 mask = gen_rtx_NOT (mode, mask);
30234 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30235 gen_rtx_AND (mode, mask, sign)));
30236 emit_insn (gen_rtx_SET (VOIDmode, result,
30237 gen_rtx_IOR (mode, abs_value, sgn)));
30240 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30241 mask for masking out the sign-bit is stored in *SMASK, if that is
30244 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30246 enum machine_mode mode = GET_MODE (op0);
30249 xa = gen_reg_rtx (mode);
30250 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
30251 if (!VECTOR_MODE_P (mode))
30253 /* We need to generate a scalar mode mask in this case. */
30254 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30255 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30256 mask = gen_reg_rtx (mode);
30257 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30259 emit_insn (gen_rtx_SET (VOIDmode, xa,
30260 gen_rtx_AND (mode, op0, mask)));
30268 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30269 swapping the operands if SWAP_OPERANDS is true. The expanded
30270 code is a forward jump to a newly created label in case the
30271 comparison is true. The generated label rtx is returned. */
30273 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30274 bool swap_operands)
30285 label = gen_label_rtx ();
30286 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30287 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30288 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30289 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30290 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30291 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30292 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30293 JUMP_LABEL (tmp) = label;
30298 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30299 using comparison code CODE. Operands are swapped for the comparison if
30300 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30302 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30303 bool swap_operands)
30305 enum machine_mode mode = GET_MODE (op0);
30306 rtx mask = gen_reg_rtx (mode);
30315 if (mode == DFmode)
30316 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30317 gen_rtx_fmt_ee (code, mode, op0, op1)));
30319 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30320 gen_rtx_fmt_ee (code, mode, op0, op1)));
30325 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30326 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30328 ix86_gen_TWO52 (enum machine_mode mode)
30330 REAL_VALUE_TYPE TWO52r;
30333 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30334 TWO52 = const_double_from_real_value (TWO52r, mode);
30335 TWO52 = force_reg (mode, TWO52);
30340 /* Expand SSE sequence for computing lround from OP1 storing
30343 ix86_expand_lround (rtx op0, rtx op1)
30345 /* C code for the stuff we're doing below:
30346 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30349 enum machine_mode mode = GET_MODE (op1);
30350 const struct real_format *fmt;
30351 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30354 /* load nextafter (0.5, 0.0) */
30355 fmt = REAL_MODE_FORMAT (mode);
30356 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30357 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30359 /* adj = copysign (0.5, op1) */
30360 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30361 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
30363 /* adj = op1 + adj */
30364 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
30366 /* op0 = (imode)adj */
30367 expand_fix (op0, adj, 0);
30370 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
30373 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
30375 /* C code for the stuff we're doing below (for do_floor):
30377 xi -= (double)xi > op1 ? 1 : 0;
30380 enum machine_mode fmode = GET_MODE (op1);
30381 enum machine_mode imode = GET_MODE (op0);
30382 rtx ireg, freg, label, tmp;
30384 /* reg = (long)op1 */
30385 ireg = gen_reg_rtx (imode);
30386 expand_fix (ireg, op1, 0);
30388 /* freg = (double)reg */
30389 freg = gen_reg_rtx (fmode);
30390 expand_float (freg, ireg, 0);
30392 /* ireg = (freg > op1) ? ireg - 1 : ireg */
30393 label = ix86_expand_sse_compare_and_jump (UNLE,
30394 freg, op1, !do_floor);
30395 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30396 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30397 emit_move_insn (ireg, tmp);
30399 emit_label (label);
30400 LABEL_NUSES (label) = 1;
30402 emit_move_insn (op0, ireg);
30405 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30406 result in OPERAND0. */
30408 ix86_expand_rint (rtx operand0, rtx operand1)
30410 /* C code for the stuff we're doing below:
30411 xa = fabs (operand1);
30412 if (!isless (xa, 2**52))
30414 xa = xa + 2**52 - 2**52;
30415 return copysign (xa, operand1);
30417 enum machine_mode mode = GET_MODE (operand0);
30418 rtx res, xa, label, TWO52, mask;
30420 res = gen_reg_rtx (mode);
30421 emit_move_insn (res, operand1);
30423 /* xa = abs (operand1) */
30424 xa = ix86_expand_sse_fabs (res, &mask);
30426 /* if (!isless (xa, TWO52)) goto label; */
30427 TWO52 = ix86_gen_TWO52 (mode);
30428 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30430 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30431 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30433 ix86_sse_copysign_to_positive (res, xa, res, mask);
30435 emit_label (label);
30436 LABEL_NUSES (label) = 1;
30438 emit_move_insn (operand0, res);
30441 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30444 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30446 /* C code for the stuff we expand below.
30447 double xa = fabs (x), x2;
30448 if (!isless (xa, TWO52))
30450 xa = xa + TWO52 - TWO52;
30451 x2 = copysign (xa, x);
30460 enum machine_mode mode = GET_MODE (operand0);
30461 rtx xa, TWO52, tmp, label, one, res, mask;
30463 TWO52 = ix86_gen_TWO52 (mode);
30465 /* Temporary for holding the result, initialized to the input
30466 operand to ease control flow. */
30467 res = gen_reg_rtx (mode);
30468 emit_move_insn (res, operand1);
30470 /* xa = abs (operand1) */
30471 xa = ix86_expand_sse_fabs (res, &mask);
30473 /* if (!isless (xa, TWO52)) goto label; */
30474 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30476 /* xa = xa + TWO52 - TWO52; */
30477 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30478 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30480 /* xa = copysign (xa, operand1) */
30481 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30483 /* generate 1.0 or -1.0 */
30484 one = force_reg (mode,
30485 const_double_from_real_value (do_floor
30486 ? dconst1 : dconstm1, mode));
30488 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30489 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30490 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30491 gen_rtx_AND (mode, one, tmp)));
30492 /* We always need to subtract here to preserve signed zero. */
30493 tmp = expand_simple_binop (mode, MINUS,
30494 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30495 emit_move_insn (res, tmp);
30497 emit_label (label);
30498 LABEL_NUSES (label) = 1;
30500 emit_move_insn (operand0, res);
30503 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30506 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30508 /* C code for the stuff we expand below.
30509 double xa = fabs (x), x2;
30510 if (!isless (xa, TWO52))
30512 x2 = (double)(long)x;
30519 if (HONOR_SIGNED_ZEROS (mode))
30520 return copysign (x2, x);
30523 enum machine_mode mode = GET_MODE (operand0);
30524 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30526 TWO52 = ix86_gen_TWO52 (mode);
30528 /* Temporary for holding the result, initialized to the input
30529 operand to ease control flow. */
30530 res = gen_reg_rtx (mode);
30531 emit_move_insn (res, operand1);
30533 /* xa = abs (operand1) */
30534 xa = ix86_expand_sse_fabs (res, &mask);
30536 /* if (!isless (xa, TWO52)) goto label; */
30537 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30539 /* xa = (double)(long)x */
30540 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30541 expand_fix (xi, res, 0);
30542 expand_float (xa, xi, 0);
30545 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30547 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30548 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30549 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30550 gen_rtx_AND (mode, one, tmp)));
30551 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30552 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30553 emit_move_insn (res, tmp);
30555 if (HONOR_SIGNED_ZEROS (mode))
30556 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30558 emit_label (label);
30559 LABEL_NUSES (label) = 1;
30561 emit_move_insn (operand0, res);
30564 /* Expand SSE sequence for computing round from OPERAND1 storing
30565 into OPERAND0. Sequence that works without relying on DImode truncation
30566 via cvttsd2siq that is only available on 64bit targets. */
30568 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30570 /* C code for the stuff we expand below.
30571 double xa = fabs (x), xa2, x2;
30572 if (!isless (xa, TWO52))
30574 Using the absolute value and copying back sign makes
30575 -0.0 -> -0.0 correct.
30576 xa2 = xa + TWO52 - TWO52;
30581 else if (dxa > 0.5)
30583 x2 = copysign (xa2, x);
30586 enum machine_mode mode = GET_MODE (operand0);
30587 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30589 TWO52 = ix86_gen_TWO52 (mode);
30591 /* Temporary for holding the result, initialized to the input
30592 operand to ease control flow. */
30593 res = gen_reg_rtx (mode);
30594 emit_move_insn (res, operand1);
30596 /* xa = abs (operand1) */
30597 xa = ix86_expand_sse_fabs (res, &mask);
30599 /* if (!isless (xa, TWO52)) goto label; */
30600 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30602 /* xa2 = xa + TWO52 - TWO52; */
30603 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30604 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30606 /* dxa = xa2 - xa; */
30607 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30609 /* generate 0.5, 1.0 and -0.5 */
30610 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30611 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30612 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30616 tmp = gen_reg_rtx (mode);
30617 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30618 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30619 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30620 gen_rtx_AND (mode, one, tmp)));
30621 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30622 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30623 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30624 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30625 gen_rtx_AND (mode, one, tmp)));
30626 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30628 /* res = copysign (xa2, operand1) */
30629 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30631 emit_label (label);
30632 LABEL_NUSES (label) = 1;
30634 emit_move_insn (operand0, res);
30637 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30640 ix86_expand_trunc (rtx operand0, rtx operand1)
30642 /* C code for SSE variant we expand below.
30643 double xa = fabs (x), x2;
30644 if (!isless (xa, TWO52))
30646 x2 = (double)(long)x;
30647 if (HONOR_SIGNED_ZEROS (mode))
30648 return copysign (x2, x);
30651 enum machine_mode mode = GET_MODE (operand0);
30652 rtx xa, xi, TWO52, label, res, mask;
30654 TWO52 = ix86_gen_TWO52 (mode);
30656 /* Temporary for holding the result, initialized to the input
30657 operand to ease control flow. */
30658 res = gen_reg_rtx (mode);
30659 emit_move_insn (res, operand1);
30661 /* xa = abs (operand1) */
30662 xa = ix86_expand_sse_fabs (res, &mask);
30664 /* if (!isless (xa, TWO52)) goto label; */
30665 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30667 /* x = (double)(long)x */
30668 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30669 expand_fix (xi, res, 0);
30670 expand_float (res, xi, 0);
30672 if (HONOR_SIGNED_ZEROS (mode))
30673 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30675 emit_label (label);
30676 LABEL_NUSES (label) = 1;
30678 emit_move_insn (operand0, res);
30681 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30684 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30686 enum machine_mode mode = GET_MODE (operand0);
30687 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30689 /* C code for SSE variant we expand below.
30690 double xa = fabs (x), x2;
30691 if (!isless (xa, TWO52))
30693 xa2 = xa + TWO52 - TWO52;
30697 x2 = copysign (xa2, x);
30701 TWO52 = ix86_gen_TWO52 (mode);
30703 /* Temporary for holding the result, initialized to the input
30704 operand to ease control flow. */
30705 res = gen_reg_rtx (mode);
30706 emit_move_insn (res, operand1);
30708 /* xa = abs (operand1) */
30709 xa = ix86_expand_sse_fabs (res, &smask);
30711 /* if (!isless (xa, TWO52)) goto label; */
30712 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30714 /* res = xa + TWO52 - TWO52; */
30715 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30716 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30717 emit_move_insn (res, tmp);
30720 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30722 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30723 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30724 emit_insn (gen_rtx_SET (VOIDmode, mask,
30725 gen_rtx_AND (mode, mask, one)));
30726 tmp = expand_simple_binop (mode, MINUS,
30727 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30728 emit_move_insn (res, tmp);
30730 /* res = copysign (res, operand1) */
30731 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30733 emit_label (label);
30734 LABEL_NUSES (label) = 1;
30736 emit_move_insn (operand0, res);
30739 /* Expand SSE sequence for computing round from OPERAND1 storing
30742 ix86_expand_round (rtx operand0, rtx operand1)
30744 /* C code for the stuff we're doing below:
30745 double xa = fabs (x);
30746 if (!isless (xa, TWO52))
30748 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30749 return copysign (xa, x);
30751 enum machine_mode mode = GET_MODE (operand0);
30752 rtx res, TWO52, xa, label, xi, half, mask;
30753 const struct real_format *fmt;
30754 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30756 /* Temporary for holding the result, initialized to the input
30757 operand to ease control flow. */
30758 res = gen_reg_rtx (mode);
30759 emit_move_insn (res, operand1);
30761 TWO52 = ix86_gen_TWO52 (mode);
30762 xa = ix86_expand_sse_fabs (res, &mask);
30763 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30765 /* load nextafter (0.5, 0.0) */
30766 fmt = REAL_MODE_FORMAT (mode);
30767 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30768 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30770 /* xa = xa + 0.5 */
30771 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30772 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30774 /* xa = (double)(int64_t)xa */
30775 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30776 expand_fix (xi, xa, 0);
30777 expand_float (xa, xi, 0);
30779 /* res = copysign (xa, operand1) */
30780 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30782 emit_label (label);
30783 LABEL_NUSES (label) = 1;
30785 emit_move_insn (operand0, res);
30789 /* Table of valid machine attributes. */
30790 static const struct attribute_spec ix86_attribute_table[] =
30792 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30793 /* Stdcall attribute says callee is responsible for popping arguments
30794 if they are not variable. */
30795 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30796 /* Fastcall attribute says callee is responsible for popping arguments
30797 if they are not variable. */
30798 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30799 /* Thiscall attribute says callee is responsible for popping arguments
30800 if they are not variable. */
30801 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30802 /* Cdecl attribute says the callee is a normal C declaration */
30803 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30804 /* Regparm attribute specifies how many integer arguments are to be
30805 passed in registers. */
30806 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30807 /* Sseregparm attribute says we are using x86_64 calling conventions
30808 for FP arguments. */
30809 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30810 /* force_align_arg_pointer says this function realigns the stack at entry. */
30811 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30812 false, true, true, ix86_handle_cconv_attribute },
30813 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30814 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30815 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30816 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30818 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30819 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30820 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30821 SUBTARGET_ATTRIBUTE_TABLE,
30823 /* ms_abi and sysv_abi calling convention function attributes. */
30824 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30825 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30826 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30828 { NULL, 0, 0, false, false, false, NULL }
30831 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30833 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30834 tree vectype ATTRIBUTE_UNUSED,
30835 int misalign ATTRIBUTE_UNUSED)
30837 switch (type_of_cost)
30840 return ix86_cost->scalar_stmt_cost;
30843 return ix86_cost->scalar_load_cost;
30846 return ix86_cost->scalar_store_cost;
30849 return ix86_cost->vec_stmt_cost;
30852 return ix86_cost->vec_align_load_cost;
30855 return ix86_cost->vec_store_cost;
30857 case vec_to_scalar:
30858 return ix86_cost->vec_to_scalar_cost;
30860 case scalar_to_vec:
30861 return ix86_cost->scalar_to_vec_cost;
30863 case unaligned_load:
30864 case unaligned_store:
30865 return ix86_cost->vec_unalign_load_cost;
30867 case cond_branch_taken:
30868 return ix86_cost->cond_taken_branch_cost;
30870 case cond_branch_not_taken:
30871 return ix86_cost->cond_not_taken_branch_cost;
30877 gcc_unreachable ();
30882 /* Implement targetm.vectorize.builtin_vec_perm. */
30885 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30887 tree itype = TREE_TYPE (vec_type);
30888 bool u = TYPE_UNSIGNED (itype);
30889 enum machine_mode vmode = TYPE_MODE (vec_type);
30890 enum ix86_builtins fcode;
30891 bool ok = TARGET_SSE2;
30897 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30900 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30902 itype = ix86_get_builtin_type (IX86_BT_DI);
30907 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30911 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30913 itype = ix86_get_builtin_type (IX86_BT_SI);
30917 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30920 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30923 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30926 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30936 *mask_type = itype;
30937 return ix86_builtins[(int) fcode];
30940 /* Return a vector mode with twice as many elements as VMODE. */
30941 /* ??? Consider moving this to a table generated by genmodes.c. */
30943 static enum machine_mode
30944 doublesize_vector_mode (enum machine_mode vmode)
30948 case V2SFmode: return V4SFmode;
30949 case V1DImode: return V2DImode;
30950 case V2SImode: return V4SImode;
30951 case V4HImode: return V8HImode;
30952 case V8QImode: return V16QImode;
30954 case V2DFmode: return V4DFmode;
30955 case V4SFmode: return V8SFmode;
30956 case V2DImode: return V4DImode;
30957 case V4SImode: return V8SImode;
30958 case V8HImode: return V16HImode;
30959 case V16QImode: return V32QImode;
30961 case V4DFmode: return V8DFmode;
30962 case V8SFmode: return V16SFmode;
30963 case V4DImode: return V8DImode;
30964 case V8SImode: return V16SImode;
30965 case V16HImode: return V32HImode;
30966 case V32QImode: return V64QImode;
30969 gcc_unreachable ();
30973 /* Construct (set target (vec_select op0 (parallel perm))) and
30974 return true if that's a valid instruction in the active ISA. */
30977 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30979 rtx rperm[MAX_VECT_LEN], x;
30982 for (i = 0; i < nelt; ++i)
30983 rperm[i] = GEN_INT (perm[i]);
30985 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30986 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30987 x = gen_rtx_SET (VOIDmode, target, x);
30990 if (recog_memoized (x) < 0)
30998 /* Similar, but generate a vec_concat from op0 and op1 as well. */
31001 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
31002 const unsigned char *perm, unsigned nelt)
31004 enum machine_mode v2mode;
31007 v2mode = doublesize_vector_mode (GET_MODE (op0));
31008 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
31009 return expand_vselect (target, x, perm, nelt);
31012 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31013 in terms of blendp[sd] / pblendw / pblendvb. */
31016 expand_vec_perm_blend (struct expand_vec_perm_d *d)
31018 enum machine_mode vmode = d->vmode;
31019 unsigned i, mask, nelt = d->nelt;
31020 rtx target, op0, op1, x;
31022 if (!TARGET_SSE4_1 || d->op0 == d->op1)
31024 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
31027 /* This is a blend, not a permute. Elements must stay in their
31028 respective lanes. */
31029 for (i = 0; i < nelt; ++i)
31031 unsigned e = d->perm[i];
31032 if (!(e == i || e == i + nelt))
31039 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31040 decision should be extracted elsewhere, so that we only try that
31041 sequence once all budget==3 options have been tried. */
31043 /* For bytes, see if bytes move in pairs so we can use pblendw with
31044 an immediate argument, rather than pblendvb with a vector argument. */
31045 if (vmode == V16QImode)
31047 bool pblendw_ok = true;
31048 for (i = 0; i < 16 && pblendw_ok; i += 2)
31049 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31053 rtx rperm[16], vperm;
31055 for (i = 0; i < nelt; ++i)
31056 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31058 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31059 vperm = force_reg (V16QImode, vperm);
31061 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31066 target = d->target;
31078 for (i = 0; i < nelt; ++i)
31079 mask |= (d->perm[i] >= nelt) << i;
31083 for (i = 0; i < 2; ++i)
31084 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31088 for (i = 0; i < 4; ++i)
31089 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31093 for (i = 0; i < 8; ++i)
31094 mask |= (d->perm[i * 2] >= 16) << i;
31098 target = gen_lowpart (vmode, target);
31099 op0 = gen_lowpart (vmode, op0);
31100 op1 = gen_lowpart (vmode, op1);
31104 gcc_unreachable ();
31107 /* This matches five different patterns with the different modes. */
31108 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31109 x = gen_rtx_SET (VOIDmode, target, x);
31115 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31116 in terms of the variable form of vpermilps.
31118 Note that we will have already failed the immediate input vpermilps,
31119 which requires that the high and low part shuffle be identical; the
31120 variable form doesn't require that. */
31123 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31125 rtx rperm[8], vperm;
31128 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31131 /* We can only permute within the 128-bit lane. */
31132 for (i = 0; i < 8; ++i)
31134 unsigned e = d->perm[i];
31135 if (i < 4 ? e >= 4 : e < 4)
31142 for (i = 0; i < 8; ++i)
31144 unsigned e = d->perm[i];
31146 /* Within each 128-bit lane, the elements of op0 are numbered
31147 from 0 and the elements of op1 are numbered from 4. */
31153 rperm[i] = GEN_INT (e);
31156 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31157 vperm = force_reg (V8SImode, vperm);
31158 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31163 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31164 in terms of pshufb or vpperm. */
31167 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31169 unsigned i, nelt, eltsz;
31170 rtx rperm[16], vperm, target, op0, op1;
31172 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31174 if (GET_MODE_SIZE (d->vmode) != 16)
31181 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31183 for (i = 0; i < nelt; ++i)
31185 unsigned j, e = d->perm[i];
31186 for (j = 0; j < eltsz; ++j)
31187 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31190 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31191 vperm = force_reg (V16QImode, vperm);
31193 target = gen_lowpart (V16QImode, d->target);
31194 op0 = gen_lowpart (V16QImode, d->op0);
31195 if (d->op0 == d->op1)
31196 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31199 op1 = gen_lowpart (V16QImode, d->op1);
31200 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31206 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31207 in a single instruction. */
31210 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31212 unsigned i, nelt = d->nelt;
31213 unsigned char perm2[MAX_VECT_LEN];
31215 /* Check plain VEC_SELECT first, because AVX has instructions that could
31216 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31217 input where SEL+CONCAT may not. */
31218 if (d->op0 == d->op1)
31220 int mask = nelt - 1;
31222 for (i = 0; i < nelt; i++)
31223 perm2[i] = d->perm[i] & mask;
31225 if (expand_vselect (d->target, d->op0, perm2, nelt))
31228 /* There are plenty of patterns in sse.md that are written for
31229 SEL+CONCAT and are not replicated for a single op. Perhaps
31230 that should be changed, to avoid the nastiness here. */
31232 /* Recognize interleave style patterns, which means incrementing
31233 every other permutation operand. */
31234 for (i = 0; i < nelt; i += 2)
31236 perm2[i] = d->perm[i] & mask;
31237 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31239 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31242 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31245 for (i = 0; i < nelt; i += 4)
31247 perm2[i + 0] = d->perm[i + 0] & mask;
31248 perm2[i + 1] = d->perm[i + 1] & mask;
31249 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31250 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31253 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31258 /* Finally, try the fully general two operand permute. */
31259 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31262 /* Recognize interleave style patterns with reversed operands. */
31263 if (d->op0 != d->op1)
31265 for (i = 0; i < nelt; ++i)
31267 unsigned e = d->perm[i];
31275 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31279 /* Try the SSE4.1 blend variable merge instructions. */
31280 if (expand_vec_perm_blend (d))
31283 /* Try one of the AVX vpermil variable permutations. */
31284 if (expand_vec_perm_vpermil (d))
31287 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31288 if (expand_vec_perm_pshufb (d))
31294 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31295 in terms of a pair of pshuflw + pshufhw instructions. */
31298 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31300 unsigned char perm2[MAX_VECT_LEN];
31304 if (d->vmode != V8HImode || d->op0 != d->op1)
31307 /* The two permutations only operate in 64-bit lanes. */
31308 for (i = 0; i < 4; ++i)
31309 if (d->perm[i] >= 4)
31311 for (i = 4; i < 8; ++i)
31312 if (d->perm[i] < 4)
31318 /* Emit the pshuflw. */
31319 memcpy (perm2, d->perm, 4);
31320 for (i = 4; i < 8; ++i)
31322 ok = expand_vselect (d->target, d->op0, perm2, 8);
31325 /* Emit the pshufhw. */
31326 memcpy (perm2 + 4, d->perm + 4, 4);
31327 for (i = 0; i < 4; ++i)
31329 ok = expand_vselect (d->target, d->target, perm2, 8);
31335 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31336 the permutation using the SSSE3 palignr instruction. This succeeds
31337 when all of the elements in PERM fit within one vector and we merely
31338 need to shift them down so that a single vector permutation has a
31339 chance to succeed. */
31342 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31344 unsigned i, nelt = d->nelt;
31349 /* Even with AVX, palignr only operates on 128-bit vectors. */
31350 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31353 min = nelt, max = 0;
31354 for (i = 0; i < nelt; ++i)
31356 unsigned e = d->perm[i];
31362 if (min == 0 || max - min >= nelt)
31365 /* Given that we have SSSE3, we know we'll be able to implement the
31366 single operand permutation after the palignr with pshufb. */
31370 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
31371 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
31372 gen_lowpart (TImode, d->op1),
31373 gen_lowpart (TImode, d->op0), shift));
31375 d->op0 = d->op1 = d->target;
31378 for (i = 0; i < nelt; ++i)
31380 unsigned e = d->perm[i] - min;
31386 /* Test for the degenerate case where the alignment by itself
31387 produces the desired permutation. */
31391 ok = expand_vec_perm_1 (d);
31397 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31398 a two vector permutation into a single vector permutation by using
31399 an interleave operation to merge the vectors. */
31402 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31404 struct expand_vec_perm_d dremap, dfinal;
31405 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31406 unsigned contents, h1, h2, h3, h4;
31407 unsigned char remap[2 * MAX_VECT_LEN];
31411 if (d->op0 == d->op1)
31414 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31415 lanes. We can use similar techniques with the vperm2f128 instruction,
31416 but it requires slightly different logic. */
31417 if (GET_MODE_SIZE (d->vmode) != 16)
31420 /* Examine from whence the elements come. */
31422 for (i = 0; i < nelt; ++i)
31423 contents |= 1u << d->perm[i];
31425 /* Split the two input vectors into 4 halves. */
31426 h1 = (1u << nelt2) - 1;
31431 memset (remap, 0xff, sizeof (remap));
31434 /* If the elements from the low halves use interleave low, and similarly
31435 for interleave high. If the elements are from mis-matched halves, we
31436 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31437 if ((contents & (h1 | h3)) == contents)
31439 for (i = 0; i < nelt2; ++i)
31442 remap[i + nelt] = i * 2 + 1;
31443 dremap.perm[i * 2] = i;
31444 dremap.perm[i * 2 + 1] = i + nelt;
31447 else if ((contents & (h2 | h4)) == contents)
31449 for (i = 0; i < nelt2; ++i)
31451 remap[i + nelt2] = i * 2;
31452 remap[i + nelt + nelt2] = i * 2 + 1;
31453 dremap.perm[i * 2] = i + nelt2;
31454 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31457 else if ((contents & (h1 | h4)) == contents)
31459 for (i = 0; i < nelt2; ++i)
31462 remap[i + nelt + nelt2] = i + nelt2;
31463 dremap.perm[i] = i;
31464 dremap.perm[i + nelt2] = i + nelt + nelt2;
31468 dremap.vmode = V2DImode;
31470 dremap.perm[0] = 0;
31471 dremap.perm[1] = 3;
31474 else if ((contents & (h2 | h3)) == contents)
31476 for (i = 0; i < nelt2; ++i)
31478 remap[i + nelt2] = i;
31479 remap[i + nelt] = i + nelt2;
31480 dremap.perm[i] = i + nelt2;
31481 dremap.perm[i + nelt2] = i + nelt;
31485 dremap.vmode = V2DImode;
31487 dremap.perm[0] = 1;
31488 dremap.perm[1] = 2;
31494 /* Use the remapping array set up above to move the elements from their
31495 swizzled locations into their final destinations. */
31497 for (i = 0; i < nelt; ++i)
31499 unsigned e = remap[d->perm[i]];
31500 gcc_assert (e < nelt);
31501 dfinal.perm[i] = e;
31503 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31504 dfinal.op1 = dfinal.op0;
31505 dremap.target = dfinal.op0;
31507 /* Test if the final remap can be done with a single insn. For V4SFmode or
31508 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31510 ok = expand_vec_perm_1 (&dfinal);
31511 seq = get_insns ();
31517 if (dremap.vmode != dfinal.vmode)
31519 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31520 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31521 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31524 ok = expand_vec_perm_1 (&dremap);
31531 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31532 permutation with two pshufb insns and an ior. We should have already
31533 failed all two instruction sequences. */
31536 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31538 rtx rperm[2][16], vperm, l, h, op, m128;
31539 unsigned int i, nelt, eltsz;
31541 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31543 gcc_assert (d->op0 != d->op1);
31546 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31548 /* Generate two permutation masks. If the required element is within
31549 the given vector it is shuffled into the proper lane. If the required
31550 element is in the other vector, force a zero into the lane by setting
31551 bit 7 in the permutation mask. */
31552 m128 = GEN_INT (-128);
31553 for (i = 0; i < nelt; ++i)
31555 unsigned j, e = d->perm[i];
31556 unsigned which = (e >= nelt);
31560 for (j = 0; j < eltsz; ++j)
31562 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31563 rperm[1-which][i*eltsz + j] = m128;
31567 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31568 vperm = force_reg (V16QImode, vperm);
31570 l = gen_reg_rtx (V16QImode);
31571 op = gen_lowpart (V16QImode, d->op0);
31572 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31574 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31575 vperm = force_reg (V16QImode, vperm);
31577 h = gen_reg_rtx (V16QImode);
31578 op = gen_lowpart (V16QImode, d->op1);
31579 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31581 op = gen_lowpart (V16QImode, d->target);
31582 emit_insn (gen_iorv16qi3 (op, l, h));
31587 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31588 and extract-odd permutations. */
31591 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31593 rtx t1, t2, t3, t4;
31598 t1 = gen_reg_rtx (V4DFmode);
31599 t2 = gen_reg_rtx (V4DFmode);
31601 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31602 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31603 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31605 /* Now an unpck[lh]pd will produce the result required. */
31607 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31609 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31615 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
31616 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
31617 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
31619 t1 = gen_reg_rtx (V8SFmode);
31620 t2 = gen_reg_rtx (V8SFmode);
31621 t3 = gen_reg_rtx (V8SFmode);
31622 t4 = gen_reg_rtx (V8SFmode);
31624 /* Shuffle within the 128-bit lanes to produce:
31625 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
31626 expand_vselect (t1, d->op0, perm1, 8);
31627 expand_vselect (t2, d->op1, perm1, 8);
31629 /* Shuffle the lanes around to produce:
31630 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
31631 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
31632 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
31634 /* Now a vpermil2p will produce the result required. */
31635 /* ??? The vpermil2p requires a vector constant. Another option
31636 is a unpck[lh]ps to merge the two vectors to produce
31637 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
31638 vpermilps to get the elements into the final order. */
31641 memcpy (d->perm, odd ? permo: perme, 8);
31642 expand_vec_perm_vpermil (d);
31650 /* These are always directly implementable by expand_vec_perm_1. */
31651 gcc_unreachable ();
31655 return expand_vec_perm_pshufb2 (d);
31658 /* We need 2*log2(N)-1 operations to achieve odd/even
31659 with interleave. */
31660 t1 = gen_reg_rtx (V8HImode);
31661 t2 = gen_reg_rtx (V8HImode);
31662 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31663 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31664 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31665 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31667 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31669 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31676 return expand_vec_perm_pshufb2 (d);
31679 t1 = gen_reg_rtx (V16QImode);
31680 t2 = gen_reg_rtx (V16QImode);
31681 t3 = gen_reg_rtx (V16QImode);
31682 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31683 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31684 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31685 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31686 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31687 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31689 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31691 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31697 gcc_unreachable ();
31703 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31704 extract-even and extract-odd permutations. */
31707 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31709 unsigned i, odd, nelt = d->nelt;
31712 if (odd != 0 && odd != 1)
31715 for (i = 1; i < nelt; ++i)
31716 if (d->perm[i] != 2 * i + odd)
31719 return expand_vec_perm_even_odd_1 (d, odd);
31722 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31723 permutations. We assume that expand_vec_perm_1 has already failed. */
31726 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31728 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31729 enum machine_mode vmode = d->vmode;
31730 unsigned char perm2[4];
31738 /* These are special-cased in sse.md so that we can optionally
31739 use the vbroadcast instruction. They expand to two insns
31740 if the input happens to be in a register. */
31741 gcc_unreachable ();
31747 /* These are always implementable using standard shuffle patterns. */
31748 gcc_unreachable ();
31752 /* These can be implemented via interleave. We save one insn by
31753 stopping once we have promoted to V4SImode and then use pshufd. */
31756 optab otab = vec_interleave_low_optab;
31760 otab = vec_interleave_high_optab;
31765 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31766 vmode = get_mode_wider_vector (vmode);
31767 op0 = gen_lowpart (vmode, op0);
31769 while (vmode != V4SImode);
31771 memset (perm2, elt, 4);
31772 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31777 gcc_unreachable ();
31781 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31782 broadcast permutations. */
31785 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31787 unsigned i, elt, nelt = d->nelt;
31789 if (d->op0 != d->op1)
31793 for (i = 1; i < nelt; ++i)
31794 if (d->perm[i] != elt)
31797 return expand_vec_perm_broadcast_1 (d);
31800 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31801 With all of the interface bits taken care of, perform the expansion
31802 in D and return true on success. */
31805 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31807 /* Try a single instruction expansion. */
31808 if (expand_vec_perm_1 (d))
31811 /* Try sequences of two instructions. */
31813 if (expand_vec_perm_pshuflw_pshufhw (d))
31816 if (expand_vec_perm_palignr (d))
31819 if (expand_vec_perm_interleave2 (d))
31822 if (expand_vec_perm_broadcast (d))
31825 /* Try sequences of three instructions. */
31827 if (expand_vec_perm_pshufb2 (d))
31830 /* ??? Look for narrow permutations whose element orderings would
31831 allow the promotion to a wider mode. */
31833 /* ??? Look for sequences of interleave or a wider permute that place
31834 the data into the correct lanes for a half-vector shuffle like
31835 pshuf[lh]w or vpermilps. */
31837 /* ??? Look for sequences of interleave that produce the desired results.
31838 The combinatorics of punpck[lh] get pretty ugly... */
31840 if (expand_vec_perm_even_odd (d))
31846 /* Extract the values from the vector CST into the permutation array in D.
31847 Return 0 on error, 1 if all values from the permutation come from the
31848 first vector, 2 if all values from the second vector, and 3 otherwise. */
31851 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31853 tree list = TREE_VECTOR_CST_ELTS (cst);
31854 unsigned i, nelt = d->nelt;
31857 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31859 unsigned HOST_WIDE_INT e;
31861 if (!host_integerp (TREE_VALUE (list), 1))
31863 e = tree_low_cst (TREE_VALUE (list), 1);
31867 ret |= (e < nelt ? 1 : 2);
31870 gcc_assert (list == NULL);
31872 /* For all elements from second vector, fold the elements to first. */
31874 for (i = 0; i < nelt; ++i)
31875 d->perm[i] -= nelt;
31881 ix86_expand_vec_perm_builtin (tree exp)
31883 struct expand_vec_perm_d d;
31884 tree arg0, arg1, arg2;
31886 arg0 = CALL_EXPR_ARG (exp, 0);
31887 arg1 = CALL_EXPR_ARG (exp, 1);
31888 arg2 = CALL_EXPR_ARG (exp, 2);
31890 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31891 d.nelt = GET_MODE_NUNITS (d.vmode);
31892 d.testing_p = false;
31893 gcc_assert (VECTOR_MODE_P (d.vmode));
31895 if (TREE_CODE (arg2) != VECTOR_CST)
31897 error_at (EXPR_LOCATION (exp),
31898 "vector permutation requires vector constant");
31902 switch (extract_vec_perm_cst (&d, arg2))
31908 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31912 if (!operand_equal_p (arg0, arg1, 0))
31914 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31915 d.op0 = force_reg (d.vmode, d.op0);
31916 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31917 d.op1 = force_reg (d.vmode, d.op1);
31921 /* The elements of PERM do not suggest that only the first operand
31922 is used, but both operands are identical. Allow easier matching
31923 of the permutation by folding the permutation into the single
31926 unsigned i, nelt = d.nelt;
31927 for (i = 0; i < nelt; ++i)
31928 if (d.perm[i] >= nelt)
31934 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31935 d.op0 = force_reg (d.vmode, d.op0);
31940 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31941 d.op0 = force_reg (d.vmode, d.op0);
31946 d.target = gen_reg_rtx (d.vmode);
31947 if (ix86_expand_vec_perm_builtin_1 (&d))
31950 /* For compiler generated permutations, we should never got here, because
31951 the compiler should also be checking the ok hook. But since this is a
31952 builtin the user has access too, so don't abort. */
31956 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31959 sorry ("vector permutation (%d %d %d %d)",
31960 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31963 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31964 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31965 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31968 sorry ("vector permutation "
31969 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31970 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31971 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31972 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31973 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31976 gcc_unreachable ();
31979 return CONST0_RTX (d.vmode);
31982 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31985 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31987 struct expand_vec_perm_d d;
31991 d.vmode = TYPE_MODE (vec_type);
31992 d.nelt = GET_MODE_NUNITS (d.vmode);
31993 d.testing_p = true;
31995 /* Given sufficient ISA support we can just return true here
31996 for selected vector modes. */
31997 if (GET_MODE_SIZE (d.vmode) == 16)
31999 /* All implementable with a single vpperm insn. */
32002 /* All implementable with 2 pshufb + 1 ior. */
32005 /* All implementable with shufpd or unpck[lh]pd. */
32010 vec_mask = extract_vec_perm_cst (&d, mask);
32012 /* This hook is cannot be called in response to something that the
32013 user does (unlike the builtin expander) so we shouldn't ever see
32014 an error generated from the extract. */
32015 gcc_assert (vec_mask > 0 && vec_mask <= 3);
32016 one_vec = (vec_mask != 3);
32018 /* Implementable with shufps or pshufd. */
32019 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
32022 /* Otherwise we have to go through the motions and see if we can
32023 figure out how to generate the requested permutation. */
32024 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32025 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32027 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32030 ret = ix86_expand_vec_perm_builtin_1 (&d);
32037 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32039 struct expand_vec_perm_d d;
32045 d.vmode = GET_MODE (targ);
32046 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32047 d.testing_p = false;
32049 for (i = 0; i < nelt; ++i)
32050 d.perm[i] = i * 2 + odd;
32052 /* We'll either be able to implement the permutation directly... */
32053 if (expand_vec_perm_1 (&d))
32056 /* ... or we use the special-case patterns. */
32057 expand_vec_perm_even_odd_1 (&d, odd);
32060 /* This function returns the calling abi specific va_list type node.
32061 It returns the FNDECL specific va_list type. */
32064 ix86_fn_abi_va_list (tree fndecl)
32067 return va_list_type_node;
32068 gcc_assert (fndecl != NULL_TREE);
32070 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32071 return ms_va_list_type_node;
32073 return sysv_va_list_type_node;
32076 /* Returns the canonical va_list type specified by TYPE. If there
32077 is no valid TYPE provided, it return NULL_TREE. */
32080 ix86_canonical_va_list_type (tree type)
32084 /* Resolve references and pointers to va_list type. */
32085 if (TREE_CODE (type) == MEM_REF)
32086 type = TREE_TYPE (type);
32087 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32088 type = TREE_TYPE (type);
32089 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32090 type = TREE_TYPE (type);
32094 wtype = va_list_type_node;
32095 gcc_assert (wtype != NULL_TREE);
32097 if (TREE_CODE (wtype) == ARRAY_TYPE)
32099 /* If va_list is an array type, the argument may have decayed
32100 to a pointer type, e.g. by being passed to another function.
32101 In that case, unwrap both types so that we can compare the
32102 underlying records. */
32103 if (TREE_CODE (htype) == ARRAY_TYPE
32104 || POINTER_TYPE_P (htype))
32106 wtype = TREE_TYPE (wtype);
32107 htype = TREE_TYPE (htype);
32110 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32111 return va_list_type_node;
32112 wtype = sysv_va_list_type_node;
32113 gcc_assert (wtype != NULL_TREE);
32115 if (TREE_CODE (wtype) == ARRAY_TYPE)
32117 /* If va_list is an array type, the argument may have decayed
32118 to a pointer type, e.g. by being passed to another function.
32119 In that case, unwrap both types so that we can compare the
32120 underlying records. */
32121 if (TREE_CODE (htype) == ARRAY_TYPE
32122 || POINTER_TYPE_P (htype))
32124 wtype = TREE_TYPE (wtype);
32125 htype = TREE_TYPE (htype);
32128 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32129 return sysv_va_list_type_node;
32130 wtype = ms_va_list_type_node;
32131 gcc_assert (wtype != NULL_TREE);
32133 if (TREE_CODE (wtype) == ARRAY_TYPE)
32135 /* If va_list is an array type, the argument may have decayed
32136 to a pointer type, e.g. by being passed to another function.
32137 In that case, unwrap both types so that we can compare the
32138 underlying records. */
32139 if (TREE_CODE (htype) == ARRAY_TYPE
32140 || POINTER_TYPE_P (htype))
32142 wtype = TREE_TYPE (wtype);
32143 htype = TREE_TYPE (htype);
32146 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32147 return ms_va_list_type_node;
32150 return std_canonical_va_list_type (type);
32153 /* Iterate through the target-specific builtin types for va_list.
32154 IDX denotes the iterator, *PTREE is set to the result type of
32155 the va_list builtin, and *PNAME to its internal type.
32156 Returns zero if there is no element for this index, otherwise
32157 IDX should be increased upon the next call.
32158 Note, do not iterate a base builtin's name like __builtin_va_list.
32159 Used from c_common_nodes_and_builtins. */
32162 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32172 *ptree = ms_va_list_type_node;
32173 *pname = "__builtin_ms_va_list";
32177 *ptree = sysv_va_list_type_node;
32178 *pname = "__builtin_sysv_va_list";
32186 #undef TARGET_SCHED_DISPATCH
32187 #define TARGET_SCHED_DISPATCH has_dispatch
32188 #undef TARGET_SCHED_DISPATCH_DO
32189 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32191 /* The size of the dispatch window is the total number of bytes of
32192 object code allowed in a window. */
32193 #define DISPATCH_WINDOW_SIZE 16
32195 /* Number of dispatch windows considered for scheduling. */
32196 #define MAX_DISPATCH_WINDOWS 3
32198 /* Maximum number of instructions in a window. */
32201 /* Maximum number of immediate operands in a window. */
32204 /* Maximum number of immediate bits allowed in a window. */
32205 #define MAX_IMM_SIZE 128
32207 /* Maximum number of 32 bit immediates allowed in a window. */
32208 #define MAX_IMM_32 4
32210 /* Maximum number of 64 bit immediates allowed in a window. */
32211 #define MAX_IMM_64 2
32213 /* Maximum total of loads or prefetches allowed in a window. */
32216 /* Maximum total of stores allowed in a window. */
32217 #define MAX_STORE 1
32223 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32224 enum dispatch_group {
32239 /* Number of allowable groups in a dispatch window. It is an array
32240 indexed by dispatch_group enum. 100 is used as a big number,
32241 because the number of these kind of operations does not have any
32242 effect in dispatch window, but we need them for other reasons in
32244 static unsigned int num_allowable_groups[disp_last] = {
32245 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32248 char group_name[disp_last + 1][16] = {
32249 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32250 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32251 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32254 /* Instruction path. */
32257 path_single, /* Single micro op. */
32258 path_double, /* Double micro op. */
32259 path_multi, /* Instructions with more than 2 micro op.. */
32263 /* sched_insn_info defines a window to the instructions scheduled in
32264 the basic block. It contains a pointer to the insn_info table and
32265 the instruction scheduled.
32267 Windows are allocated for each basic block and are linked
32269 typedef struct sched_insn_info_s {
32271 enum dispatch_group group;
32272 enum insn_path path;
32277 /* Linked list of dispatch windows. This is a two way list of
32278 dispatch windows of a basic block. It contains information about
32279 the number of uops in the window and the total number of
32280 instructions and of bytes in the object code for this dispatch
32282 typedef struct dispatch_windows_s {
32283 int num_insn; /* Number of insn in the window. */
32284 int num_uops; /* Number of uops in the window. */
32285 int window_size; /* Number of bytes in the window. */
32286 int window_num; /* Window number between 0 or 1. */
32287 int num_imm; /* Number of immediates in an insn. */
32288 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32289 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32290 int imm_size; /* Total immediates in the window. */
32291 int num_loads; /* Total memory loads in the window. */
32292 int num_stores; /* Total memory stores in the window. */
32293 int violation; /* Violation exists in window. */
32294 sched_insn_info *window; /* Pointer to the window. */
32295 struct dispatch_windows_s *next;
32296 struct dispatch_windows_s *prev;
32297 } dispatch_windows;
32299 /* Immediate valuse used in an insn. */
32300 typedef struct imm_info_s
32307 static dispatch_windows *dispatch_window_list;
32308 static dispatch_windows *dispatch_window_list1;
32310 /* Get dispatch group of insn. */
32312 static enum dispatch_group
32313 get_mem_group (rtx insn)
32315 enum attr_memory memory;
32317 if (INSN_CODE (insn) < 0)
32318 return disp_no_group;
32319 memory = get_attr_memory (insn);
32320 if (memory == MEMORY_STORE)
32323 if (memory == MEMORY_LOAD)
32326 if (memory == MEMORY_BOTH)
32327 return disp_load_store;
32329 return disp_no_group;
32332 /* Return true if insn is a compare instruction. */
32337 enum attr_type type;
32339 type = get_attr_type (insn);
32340 return (type == TYPE_TEST
32341 || type == TYPE_ICMP
32342 || type == TYPE_FCMP
32343 || GET_CODE (PATTERN (insn)) == COMPARE);
32346 /* Return true if a dispatch violation encountered. */
32349 dispatch_violation (void)
32351 if (dispatch_window_list->next)
32352 return dispatch_window_list->next->violation;
32353 return dispatch_window_list->violation;
32356 /* Return true if insn is a branch instruction. */
32359 is_branch (rtx insn)
32361 return (CALL_P (insn) || JUMP_P (insn));
32364 /* Return true if insn is a prefetch instruction. */
32367 is_prefetch (rtx insn)
32369 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
32372 /* This function initializes a dispatch window and the list container holding a
32373 pointer to the window. */
32376 init_window (int window_num)
32379 dispatch_windows *new_list;
32381 if (window_num == 0)
32382 new_list = dispatch_window_list;
32384 new_list = dispatch_window_list1;
32386 new_list->num_insn = 0;
32387 new_list->num_uops = 0;
32388 new_list->window_size = 0;
32389 new_list->next = NULL;
32390 new_list->prev = NULL;
32391 new_list->window_num = window_num;
32392 new_list->num_imm = 0;
32393 new_list->num_imm_32 = 0;
32394 new_list->num_imm_64 = 0;
32395 new_list->imm_size = 0;
32396 new_list->num_loads = 0;
32397 new_list->num_stores = 0;
32398 new_list->violation = false;
32400 for (i = 0; i < MAX_INSN; i++)
32402 new_list->window[i].insn = NULL;
32403 new_list->window[i].group = disp_no_group;
32404 new_list->window[i].path = no_path;
32405 new_list->window[i].byte_len = 0;
32406 new_list->window[i].imm_bytes = 0;
32411 /* This function allocates and initializes a dispatch window and the
32412 list container holding a pointer to the window. */
32414 static dispatch_windows *
32415 allocate_window (void)
32417 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32418 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32423 /* This routine initializes the dispatch scheduling information. It
32424 initiates building dispatch scheduler tables and constructs the
32425 first dispatch window. */
32428 init_dispatch_sched (void)
32430 /* Allocate a dispatch list and a window. */
32431 dispatch_window_list = allocate_window ();
32432 dispatch_window_list1 = allocate_window ();
32437 /* This function returns true if a branch is detected. End of a basic block
32438 does not have to be a branch, but here we assume only branches end a
32442 is_end_basic_block (enum dispatch_group group)
32444 return group == disp_branch;
32447 /* This function is called when the end of a window processing is reached. */
32450 process_end_window (void)
32452 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32453 if (dispatch_window_list->next)
32455 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32456 gcc_assert (dispatch_window_list->window_size
32457 + dispatch_window_list1->window_size <= 48);
32463 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32464 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32465 for 48 bytes of instructions. Note that these windows are not dispatch
32466 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32468 static dispatch_windows *
32469 allocate_next_window (int window_num)
32471 if (window_num == 0)
32473 if (dispatch_window_list->next)
32476 return dispatch_window_list;
32479 dispatch_window_list->next = dispatch_window_list1;
32480 dispatch_window_list1->prev = dispatch_window_list;
32482 return dispatch_window_list1;
32485 /* Increment the number of immediate operands of an instruction. */
32488 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32493 switch ( GET_CODE (*in_rtx))
32498 (imm_values->imm)++;
32499 if (x86_64_immediate_operand (*in_rtx, SImode))
32500 (imm_values->imm32)++;
32502 (imm_values->imm64)++;
32506 (imm_values->imm)++;
32507 (imm_values->imm64)++;
32511 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32513 (imm_values->imm)++;
32514 (imm_values->imm32)++;
32525 /* Compute number of immediate operands of an instruction. */
32528 find_constant (rtx in_rtx, imm_info *imm_values)
32530 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32531 (rtx_function) find_constant_1, (void *) imm_values);
32534 /* Return total size of immediate operands of an instruction along with number
32535 of corresponding immediate-operands. It initializes its parameters to zero
32536 befor calling FIND_CONSTANT.
32537 INSN is the input instruction. IMM is the total of immediates.
32538 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32542 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32544 imm_info imm_values = {0, 0, 0};
32546 find_constant (insn, &imm_values);
32547 *imm = imm_values.imm;
32548 *imm32 = imm_values.imm32;
32549 *imm64 = imm_values.imm64;
32550 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32553 /* This function indicates if an operand of an instruction is an
32557 has_immediate (rtx insn)
32559 int num_imm_operand;
32560 int num_imm32_operand;
32561 int num_imm64_operand;
32564 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32565 &num_imm64_operand);
32569 /* Return single or double path for instructions. */
32571 static enum insn_path
32572 get_insn_path (rtx insn)
32574 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32576 if ((int)path == 0)
32577 return path_single;
32579 if ((int)path == 1)
32580 return path_double;
32585 /* Return insn dispatch group. */
32587 static enum dispatch_group
32588 get_insn_group (rtx insn)
32590 enum dispatch_group group = get_mem_group (insn);
32594 if (is_branch (insn))
32595 return disp_branch;
32600 if (has_immediate (insn))
32603 if (is_prefetch (insn))
32604 return disp_prefetch;
32606 return disp_no_group;
32609 /* Count number of GROUP restricted instructions in a dispatch
32610 window WINDOW_LIST. */
32613 count_num_restricted (rtx insn, dispatch_windows *window_list)
32615 enum dispatch_group group = get_insn_group (insn);
32617 int num_imm_operand;
32618 int num_imm32_operand;
32619 int num_imm64_operand;
32621 if (group == disp_no_group)
32624 if (group == disp_imm)
32626 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32627 &num_imm64_operand);
32628 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32629 || num_imm_operand + window_list->num_imm > MAX_IMM
32630 || (num_imm32_operand > 0
32631 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32632 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32633 || (num_imm64_operand > 0
32634 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32635 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32636 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32637 && num_imm64_operand > 0
32638 && ((window_list->num_imm_64 > 0
32639 && window_list->num_insn >= 2)
32640 || window_list->num_insn >= 3)))
32646 if ((group == disp_load_store
32647 && (window_list->num_loads >= MAX_LOAD
32648 || window_list->num_stores >= MAX_STORE))
32649 || ((group == disp_load
32650 || group == disp_prefetch)
32651 && window_list->num_loads >= MAX_LOAD)
32652 || (group == disp_store
32653 && window_list->num_stores >= MAX_STORE))
32659 /* This function returns true if insn satisfies dispatch rules on the
32660 last window scheduled. */
32663 fits_dispatch_window (rtx insn)
32665 dispatch_windows *window_list = dispatch_window_list;
32666 dispatch_windows *window_list_next = dispatch_window_list->next;
32667 unsigned int num_restrict;
32668 enum dispatch_group group = get_insn_group (insn);
32669 enum insn_path path = get_insn_path (insn);
32672 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32673 instructions should be given the lowest priority in the
32674 scheduling process in Haifa scheduler to make sure they will be
32675 scheduled in the same dispatch window as the refrence to them. */
32676 if (group == disp_jcc || group == disp_cmp)
32679 /* Check nonrestricted. */
32680 if (group == disp_no_group || group == disp_branch)
32683 /* Get last dispatch window. */
32684 if (window_list_next)
32685 window_list = window_list_next;
32687 if (window_list->window_num == 1)
32689 sum = window_list->prev->window_size + window_list->window_size;
32692 || (min_insn_size (insn) + sum) >= 48)
32693 /* Window 1 is full. Go for next window. */
32697 num_restrict = count_num_restricted (insn, window_list);
32699 if (num_restrict > num_allowable_groups[group])
32702 /* See if it fits in the first window. */
32703 if (window_list->window_num == 0)
32705 /* The first widow should have only single and double path
32707 if (path == path_double
32708 && (window_list->num_uops + 2) > MAX_INSN)
32710 else if (path != path_single)
32716 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32717 dispatch window WINDOW_LIST. */
32720 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32722 int byte_len = min_insn_size (insn);
32723 int num_insn = window_list->num_insn;
32725 sched_insn_info *window = window_list->window;
32726 enum dispatch_group group = get_insn_group (insn);
32727 enum insn_path path = get_insn_path (insn);
32728 int num_imm_operand;
32729 int num_imm32_operand;
32730 int num_imm64_operand;
32732 if (!window_list->violation && group != disp_cmp
32733 && !fits_dispatch_window (insn))
32734 window_list->violation = true;
32736 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32737 &num_imm64_operand);
32739 /* Initialize window with new instruction. */
32740 window[num_insn].insn = insn;
32741 window[num_insn].byte_len = byte_len;
32742 window[num_insn].group = group;
32743 window[num_insn].path = path;
32744 window[num_insn].imm_bytes = imm_size;
32746 window_list->window_size += byte_len;
32747 window_list->num_insn = num_insn + 1;
32748 window_list->num_uops = window_list->num_uops + num_uops;
32749 window_list->imm_size += imm_size;
32750 window_list->num_imm += num_imm_operand;
32751 window_list->num_imm_32 += num_imm32_operand;
32752 window_list->num_imm_64 += num_imm64_operand;
32754 if (group == disp_store)
32755 window_list->num_stores += 1;
32756 else if (group == disp_load
32757 || group == disp_prefetch)
32758 window_list->num_loads += 1;
32759 else if (group == disp_load_store)
32761 window_list->num_stores += 1;
32762 window_list->num_loads += 1;
32766 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32767 If the total bytes of instructions or the number of instructions in
32768 the window exceed allowable, it allocates a new window. */
32771 add_to_dispatch_window (rtx insn)
32774 dispatch_windows *window_list;
32775 dispatch_windows *next_list;
32776 dispatch_windows *window0_list;
32777 enum insn_path path;
32778 enum dispatch_group insn_group;
32786 if (INSN_CODE (insn) < 0)
32789 byte_len = min_insn_size (insn);
32790 window_list = dispatch_window_list;
32791 next_list = window_list->next;
32792 path = get_insn_path (insn);
32793 insn_group = get_insn_group (insn);
32795 /* Get the last dispatch window. */
32797 window_list = dispatch_window_list->next;
32799 if (path == path_single)
32801 else if (path == path_double)
32804 insn_num_uops = (int) path;
32806 /* If current window is full, get a new window.
32807 Window number zero is full, if MAX_INSN uops are scheduled in it.
32808 Window number one is full, if window zero's bytes plus window
32809 one's bytes is 32, or if the bytes of the new instruction added
32810 to the total makes it greater than 48, or it has already MAX_INSN
32811 instructions in it. */
32812 num_insn = window_list->num_insn;
32813 num_uops = window_list->num_uops;
32814 window_num = window_list->window_num;
32815 insn_fits = fits_dispatch_window (insn);
32817 if (num_insn >= MAX_INSN
32818 || num_uops + insn_num_uops > MAX_INSN
32821 window_num = ~window_num & 1;
32822 window_list = allocate_next_window (window_num);
32825 if (window_num == 0)
32827 add_insn_window (insn, window_list, insn_num_uops);
32828 if (window_list->num_insn >= MAX_INSN
32829 && insn_group == disp_branch)
32831 process_end_window ();
32835 else if (window_num == 1)
32837 window0_list = window_list->prev;
32838 sum = window0_list->window_size + window_list->window_size;
32840 || (byte_len + sum) >= 48)
32842 process_end_window ();
32843 window_list = dispatch_window_list;
32846 add_insn_window (insn, window_list, insn_num_uops);
32849 gcc_unreachable ();
32851 if (is_end_basic_block (insn_group))
32853 /* End of basic block is reached do end-basic-block process. */
32854 process_end_window ();
32859 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32861 DEBUG_FUNCTION static void
32862 debug_dispatch_window_file (FILE *file, int window_num)
32864 dispatch_windows *list;
32867 if (window_num == 0)
32868 list = dispatch_window_list;
32870 list = dispatch_window_list1;
32872 fprintf (file, "Window #%d:\n", list->window_num);
32873 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32874 list->num_insn, list->num_uops, list->window_size);
32875 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32876 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32878 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32880 fprintf (file, " insn info:\n");
32882 for (i = 0; i < MAX_INSN; i++)
32884 if (!list->window[i].insn)
32886 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32887 i, group_name[list->window[i].group],
32888 i, (void *)list->window[i].insn,
32889 i, list->window[i].path,
32890 i, list->window[i].byte_len,
32891 i, list->window[i].imm_bytes);
32895 /* Print to stdout a dispatch window. */
32897 DEBUG_FUNCTION void
32898 debug_dispatch_window (int window_num)
32900 debug_dispatch_window_file (stdout, window_num);
32903 /* Print INSN dispatch information to FILE. */
32905 DEBUG_FUNCTION static void
32906 debug_insn_dispatch_info_file (FILE *file, rtx insn)
32909 enum insn_path path;
32910 enum dispatch_group group;
32912 int num_imm_operand;
32913 int num_imm32_operand;
32914 int num_imm64_operand;
32916 if (INSN_CODE (insn) < 0)
32919 byte_len = min_insn_size (insn);
32920 path = get_insn_path (insn);
32921 group = get_insn_group (insn);
32922 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32923 &num_imm64_operand);
32925 fprintf (file, " insn info:\n");
32926 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
32927 group_name[group], path, byte_len);
32928 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32929 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
32932 /* Print to STDERR the status of the ready list with respect to
32933 dispatch windows. */
32935 DEBUG_FUNCTION void
32936 debug_ready_dispatch (void)
32939 int no_ready = number_in_ready ();
32941 fprintf (stdout, "Number of ready: %d\n", no_ready);
32943 for (i = 0; i < no_ready; i++)
32944 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
32947 /* This routine is the driver of the dispatch scheduler. */
32950 do_dispatch (rtx insn, int mode)
32952 if (mode == DISPATCH_INIT)
32953 init_dispatch_sched ();
32954 else if (mode == ADD_TO_DISPATCH_WINDOW)
32955 add_to_dispatch_window (insn);
32958 /* Return TRUE if Dispatch Scheduling is supported. */
32961 has_dispatch (rtx insn, int action)
32963 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
32969 case IS_DISPATCH_ON:
32974 return is_cmp (insn);
32976 case DISPATCH_VIOLATION:
32977 return dispatch_violation ();
32979 case FITS_DISPATCH_WINDOW:
32980 return fits_dispatch_window (insn);
32986 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
32987 place emms and femms instructions. */
32989 static unsigned int
32990 ix86_units_per_simd_word (enum machine_mode mode)
32992 /* Disable double precision vectorizer if needed. */
32993 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
32994 return UNITS_PER_WORD;
32997 /* FIXME: AVX has 32byte floating point vector operations and 16byte
32998 integer vector operations. But vectorizer doesn't support
32999 different sizes for integer and floating point vectors. We limit
33000 vector size to 16byte. */
33002 return (mode == DFmode || mode == SFmode) ? 32 : 16;
33005 return TARGET_SSE ? 16 : UNITS_PER_WORD;
33008 /* Initialize the GCC target structure. */
33009 #undef TARGET_RETURN_IN_MEMORY
33010 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
33012 #undef TARGET_LEGITIMIZE_ADDRESS
33013 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
33015 #undef TARGET_ATTRIBUTE_TABLE
33016 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
33017 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33018 # undef TARGET_MERGE_DECL_ATTRIBUTES
33019 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
33022 #undef TARGET_COMP_TYPE_ATTRIBUTES
33023 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
33025 #undef TARGET_INIT_BUILTINS
33026 #define TARGET_INIT_BUILTINS ix86_init_builtins
33027 #undef TARGET_BUILTIN_DECL
33028 #define TARGET_BUILTIN_DECL ix86_builtin_decl
33029 #undef TARGET_EXPAND_BUILTIN
33030 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
33032 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
33033 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33034 ix86_builtin_vectorized_function
33036 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33037 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33039 #undef TARGET_BUILTIN_RECIPROCAL
33040 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33042 #undef TARGET_ASM_FUNCTION_EPILOGUE
33043 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33045 #undef TARGET_ENCODE_SECTION_INFO
33046 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33047 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33049 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33052 #undef TARGET_ASM_OPEN_PAREN
33053 #define TARGET_ASM_OPEN_PAREN ""
33054 #undef TARGET_ASM_CLOSE_PAREN
33055 #define TARGET_ASM_CLOSE_PAREN ""
33057 #undef TARGET_ASM_BYTE_OP
33058 #define TARGET_ASM_BYTE_OP ASM_BYTE
33060 #undef TARGET_ASM_ALIGNED_HI_OP
33061 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33062 #undef TARGET_ASM_ALIGNED_SI_OP
33063 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33065 #undef TARGET_ASM_ALIGNED_DI_OP
33066 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33069 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33070 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33072 #undef TARGET_ASM_UNALIGNED_HI_OP
33073 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33074 #undef TARGET_ASM_UNALIGNED_SI_OP
33075 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33076 #undef TARGET_ASM_UNALIGNED_DI_OP
33077 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33079 #undef TARGET_PRINT_OPERAND
33080 #define TARGET_PRINT_OPERAND ix86_print_operand
33081 #undef TARGET_PRINT_OPERAND_ADDRESS
33082 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33083 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33084 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33085 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33086 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33088 #undef TARGET_SCHED_ADJUST_COST
33089 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33090 #undef TARGET_SCHED_ISSUE_RATE
33091 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33092 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33093 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33094 ia32_multipass_dfa_lookahead
33096 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33097 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33100 #undef TARGET_HAVE_TLS
33101 #define TARGET_HAVE_TLS true
33103 #undef TARGET_CANNOT_FORCE_CONST_MEM
33104 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33105 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33106 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33108 #undef TARGET_DELEGITIMIZE_ADDRESS
33109 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33111 #undef TARGET_MS_BITFIELD_LAYOUT_P
33112 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33115 #undef TARGET_BINDS_LOCAL_P
33116 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33119 #undef TARGET_BINDS_LOCAL_P
33120 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33123 #undef TARGET_ASM_OUTPUT_MI_THUNK
33124 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33125 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33126 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33128 #undef TARGET_ASM_FILE_START
33129 #define TARGET_ASM_FILE_START x86_file_start
33131 #undef TARGET_DEFAULT_TARGET_FLAGS
33132 #define TARGET_DEFAULT_TARGET_FLAGS \
33134 | TARGET_SUBTARGET_DEFAULT \
33135 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33138 #undef TARGET_HANDLE_OPTION
33139 #define TARGET_HANDLE_OPTION ix86_handle_option
33141 #undef TARGET_OPTION_OVERRIDE
33142 #define TARGET_OPTION_OVERRIDE ix86_option_override
33143 #undef TARGET_OPTION_OPTIMIZATION
33144 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
33146 #undef TARGET_REGISTER_MOVE_COST
33147 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33148 #undef TARGET_MEMORY_MOVE_COST
33149 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33150 #undef TARGET_RTX_COSTS
33151 #define TARGET_RTX_COSTS ix86_rtx_costs
33152 #undef TARGET_ADDRESS_COST
33153 #define TARGET_ADDRESS_COST ix86_address_cost
33155 #undef TARGET_FIXED_CONDITION_CODE_REGS
33156 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33157 #undef TARGET_CC_MODES_COMPATIBLE
33158 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33160 #undef TARGET_MACHINE_DEPENDENT_REORG
33161 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33163 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33164 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33166 #undef TARGET_BUILD_BUILTIN_VA_LIST
33167 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33169 #undef TARGET_ENUM_VA_LIST_P
33170 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33172 #undef TARGET_FN_ABI_VA_LIST
33173 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33175 #undef TARGET_CANONICAL_VA_LIST_TYPE
33176 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33178 #undef TARGET_EXPAND_BUILTIN_VA_START
33179 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33181 #undef TARGET_MD_ASM_CLOBBERS
33182 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33184 #undef TARGET_PROMOTE_PROTOTYPES
33185 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33186 #undef TARGET_STRUCT_VALUE_RTX
33187 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33188 #undef TARGET_SETUP_INCOMING_VARARGS
33189 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33190 #undef TARGET_MUST_PASS_IN_STACK
33191 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33192 #undef TARGET_FUNCTION_ARG_ADVANCE
33193 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33194 #undef TARGET_FUNCTION_ARG
33195 #define TARGET_FUNCTION_ARG ix86_function_arg
33196 #undef TARGET_PASS_BY_REFERENCE
33197 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33198 #undef TARGET_INTERNAL_ARG_POINTER
33199 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33200 #undef TARGET_UPDATE_STACK_BOUNDARY
33201 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33202 #undef TARGET_GET_DRAP_RTX
33203 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33204 #undef TARGET_STRICT_ARGUMENT_NAMING
33205 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33206 #undef TARGET_STATIC_CHAIN
33207 #define TARGET_STATIC_CHAIN ix86_static_chain
33208 #undef TARGET_TRAMPOLINE_INIT
33209 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33210 #undef TARGET_RETURN_POPS_ARGS
33211 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33213 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33214 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33216 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33217 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33219 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33220 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33222 #undef TARGET_C_MODE_FOR_SUFFIX
33223 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33226 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33227 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33230 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33231 #undef TARGET_INSERT_ATTRIBUTES
33232 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33235 #undef TARGET_MANGLE_TYPE
33236 #define TARGET_MANGLE_TYPE ix86_mangle_type
33238 #undef TARGET_STACK_PROTECT_FAIL
33239 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33241 #undef TARGET_SUPPORTS_SPLIT_STACK
33242 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33244 #undef TARGET_FUNCTION_VALUE
33245 #define TARGET_FUNCTION_VALUE ix86_function_value
33247 #undef TARGET_FUNCTION_VALUE_REGNO_P
33248 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33250 #undef TARGET_SECONDARY_RELOAD
33251 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33253 #undef TARGET_CLASS_LIKELY_SPILLED_P
33254 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33256 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33257 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33258 ix86_builtin_vectorization_cost
33259 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33260 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33261 ix86_vectorize_builtin_vec_perm
33262 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33263 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33264 ix86_vectorize_builtin_vec_perm_ok
33265 #undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
33266 #define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD \
33267 ix86_units_per_simd_word
33269 #undef TARGET_SET_CURRENT_FUNCTION
33270 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33272 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33273 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33275 #undef TARGET_OPTION_SAVE
33276 #define TARGET_OPTION_SAVE ix86_function_specific_save
33278 #undef TARGET_OPTION_RESTORE
33279 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33281 #undef TARGET_OPTION_PRINT
33282 #define TARGET_OPTION_PRINT ix86_function_specific_print
33284 #undef TARGET_CAN_INLINE_P
33285 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33287 #undef TARGET_EXPAND_TO_RTL_HOOK
33288 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33290 #undef TARGET_LEGITIMATE_ADDRESS_P
33291 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33293 #undef TARGET_IRA_COVER_CLASSES
33294 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33296 #undef TARGET_FRAME_POINTER_REQUIRED
33297 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33299 #undef TARGET_CAN_ELIMINATE
33300 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33302 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33303 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33305 #undef TARGET_ASM_CODE_END
33306 #define TARGET_ASM_CODE_END ix86_code_end
33308 struct gcc_target targetm = TARGET_INITIALIZER;
33310 #include "gt-i386.h"