1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1619 m_AMDFAM10 | m_GENERIC,
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1669 static enum stringop_alg stringop_alg = no_stringop;
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1711 /* The "default" register map used in 32bit mode. */
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1724 /* The "default" register map used in 64bit mode. */
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1802 /* Define parameter passing and return registers. */
1804 static int const x86_64_int_parameter_registers[6] =
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1811 CX_REG, DX_REG, R8_REG, R9_REG
1814 static int const x86_64_int_return_registers[4] =
1816 AX_REG, DX_REG, DI_REG, SI_REG
1819 /* Define the structure for the machine field in struct function. */
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1825 struct stack_local_entry *next;
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1835 saved static chain if ix86_static_chain_on_stack
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1844 <- sse_regs_save_offset
1847 [va_arg registers] |
1851 [padding2] | = to_allocate
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1900 /* -mstackrealign option */
1901 static const char ix86_force_align_arg_pointer_string[]
1902 = "force_align_arg_pointer";
1904 static rtx (*ix86_gen_leave) (void);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1918 /* Alignment for incoming stack boundary in bits specified at
1920 static unsigned int ix86_user_incoming_stack_boundary;
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1941 int ix86_section_threshold = 65536;
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1947 /* Fence to use after loop using movnt. */
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1972 #define MAX_CLASSES 4
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1990 static void predict_jump (int);
1991 static unsigned int split_stack_prologue_scratch_regno (void);
1992 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
1994 enum ix86_function_specific_strings
1996 IX86_FUNCTION_SPECIFIC_ARCH,
1997 IX86_FUNCTION_SPECIFIC_TUNE,
1998 IX86_FUNCTION_SPECIFIC_FPMATH,
1999 IX86_FUNCTION_SPECIFIC_MAX
2002 static char *ix86_target_string (int, int, const char *, const char *,
2003 const char *, bool);
2004 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2005 static void ix86_function_specific_save (struct cl_target_option *);
2006 static void ix86_function_specific_restore (struct cl_target_option *);
2007 static void ix86_function_specific_print (FILE *, int,
2008 struct cl_target_option *);
2009 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2010 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2011 static bool ix86_can_inline_p (tree, tree);
2012 static void ix86_set_current_function (tree);
2013 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2015 static enum calling_abi ix86_function_abi (const_tree);
2018 #ifndef SUBTARGET32_DEFAULT_CPU
2019 #define SUBTARGET32_DEFAULT_CPU "i386"
2022 /* The svr4 ABI for the i386 says that records and unions are returned
2024 #ifndef DEFAULT_PCC_STRUCT_RETURN
2025 #define DEFAULT_PCC_STRUCT_RETURN 1
2028 /* Whether -mtune= or -march= were specified */
2029 static int ix86_tune_defaulted;
2030 static int ix86_arch_specified;
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2152 /* Processor target table, indexed by processor number */
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2210 /* Return true if a red-zone is in use. */
2213 ix86_using_red_zone (void)
2215 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2218 /* Implement TARGET_HANDLE_OPTION. */
2221 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2228 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2241 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2257 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2262 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2270 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2275 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2288 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2296 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2301 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2309 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2314 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2322 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2327 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2335 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2340 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2348 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2353 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2354 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2364 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2365 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2372 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2376 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2384 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2385 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2389 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2397 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2398 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2402 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2410 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2411 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2415 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2423 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2424 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2428 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2436 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2437 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2441 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2449 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2450 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2454 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2462 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2463 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2467 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2475 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2476 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2480 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2488 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2489 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2493 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2501 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2502 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2506 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2514 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2515 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2519 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2527 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2528 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2532 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2540 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2545 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2553 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2558 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2559 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2568 /* Return a string that documents the current -m options. The caller is
2569 responsible for freeing the string. */
2572 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2573 const char *fpmath, bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 int mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-m64", OPTION_MASK_ISA_64BIT },
2586 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2587 { "-mfma", OPTION_MASK_ISA_FMA },
2588 { "-mxop", OPTION_MASK_ISA_XOP },
2589 { "-mlwp", OPTION_MASK_ISA_LWP },
2590 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2591 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2592 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2593 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2594 { "-msse3", OPTION_MASK_ISA_SSE3 },
2595 { "-msse2", OPTION_MASK_ISA_SSE2 },
2596 { "-msse", OPTION_MASK_ISA_SSE },
2597 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2598 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2599 { "-mmmx", OPTION_MASK_ISA_MMX },
2600 { "-mabm", OPTION_MASK_ISA_ABM },
2601 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2602 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2603 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2604 { "-maes", OPTION_MASK_ISA_AES },
2605 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2606 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2607 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2608 { "-mf16c", OPTION_MASK_ISA_F16C },
2612 static struct ix86_target_opts flag_opts[] =
2614 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2615 { "-m80387", MASK_80387 },
2616 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2617 { "-malign-double", MASK_ALIGN_DOUBLE },
2618 { "-mcld", MASK_CLD },
2619 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2620 { "-mieee-fp", MASK_IEEE_FP },
2621 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2622 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2623 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2624 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2625 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2626 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2627 { "-mno-red-zone", MASK_NO_RED_ZONE },
2628 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2629 { "-mrecip", MASK_RECIP },
2630 { "-mrtd", MASK_RTD },
2631 { "-msseregparm", MASK_SSEREGPARM },
2632 { "-mstack-arg-probe", MASK_STACK_PROBE },
2633 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2634 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2635 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2638 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2641 char target_other[40];
2650 memset (opts, '\0', sizeof (opts));
2652 /* Add -march= option. */
2655 opts[num][0] = "-march=";
2656 opts[num++][1] = arch;
2659 /* Add -mtune= option. */
2662 opts[num][0] = "-mtune=";
2663 opts[num++][1] = tune;
2666 /* Pick out the options in isa options. */
2667 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2669 if ((isa & isa_opts[i].mask) != 0)
2671 opts[num++][0] = isa_opts[i].option;
2672 isa &= ~ isa_opts[i].mask;
2676 if (isa && add_nl_p)
2678 opts[num++][0] = isa_other;
2679 sprintf (isa_other, "(other isa: %#x)", isa);
2682 /* Add flag options. */
2683 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2685 if ((flags & flag_opts[i].mask) != 0)
2687 opts[num++][0] = flag_opts[i].option;
2688 flags &= ~ flag_opts[i].mask;
2692 if (flags && add_nl_p)
2694 opts[num++][0] = target_other;
2695 sprintf (target_other, "(other flags: %#x)", flags);
2698 /* Add -fpmath= option. */
2701 opts[num][0] = "-mfpmath=";
2702 opts[num++][1] = fpmath;
2709 gcc_assert (num < ARRAY_SIZE (opts));
2711 /* Size the string. */
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2717 for (j = 0; j < 2; j++)
2719 len += strlen (opts[i][j]);
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2726 for (i = 0; i < num; i++)
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2746 for (j = 0; j < 2; j++)
2749 memcpy (ptr, opts[i][j], len2[j]);
2751 line_len += len2[j];
2756 gcc_assert (ret + len >= ptr);
2761 /* Return TRUE if software prefetching is beneficial for the
2765 software_prefetching_beneficial_p (void)
2769 case PROCESSOR_GEODE:
2771 case PROCESSOR_ATHLON:
2773 case PROCESSOR_AMDFAM10:
2781 /* Return true, if profiling code should be emitted before
2782 prologue. Otherwise it returns false.
2783 Note: For x86 with "hotfix" it is sorried. */
2785 ix86_profile_before_prologue (void)
2787 return flag_fentry != 0;
2790 /* Function that is callable from the debugger to print the current
2793 ix86_debug_options (void)
2795 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2796 ix86_arch_string, ix86_tune_string,
2797 ix86_fpmath_string, true);
2801 fprintf (stderr, "%s\n\n", opts);
2805 fputs ("<no options>\n\n", stderr);
2810 /* Override various settings based on options. If MAIN_ARGS_P, the
2811 options are from the command line, otherwise they are from
2815 ix86_option_override_internal (bool main_args_p)
2818 unsigned int ix86_arch_mask, ix86_tune_mask;
2819 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2824 /* Comes from final.c -- no real reason to change it. */
2825 #define MAX_CODE_ALIGN 16
2833 PTA_PREFETCH_SSE = 1 << 4,
2835 PTA_3DNOW_A = 1 << 6,
2839 PTA_POPCNT = 1 << 10,
2841 PTA_SSE4A = 1 << 12,
2842 PTA_NO_SAHF = 1 << 13,
2843 PTA_SSE4_1 = 1 << 14,
2844 PTA_SSE4_2 = 1 << 15,
2846 PTA_PCLMUL = 1 << 17,
2849 PTA_MOVBE = 1 << 20,
2853 PTA_FSGSBASE = 1 << 24,
2854 PTA_RDRND = 1 << 25,
2860 const char *const name; /* processor name or nickname. */
2861 const enum processor_type processor;
2862 const enum attr_cpu schedule;
2863 const unsigned /*enum pta_flags*/ flags;
2865 const processor_alias_table[] =
2867 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2868 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2869 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2870 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2871 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2872 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2873 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2874 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2875 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2876 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2877 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2878 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2879 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2883 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2884 PTA_MMX | PTA_SSE | PTA_SSE2},
2885 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX |PTA_SSE | PTA_SSE2},
2887 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2},
2889 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2891 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_CX16 | PTA_NO_SAHF},
2894 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16},
2897 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2898 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2899 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2900 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2902 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2903 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2904 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2905 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2909 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2914 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2915 {"x86-64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_NO_SAHF},
2920 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2923 {"opteron", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_NO_SAHF},
2926 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2929 {"athlon64", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_NO_SAHF},
2932 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2935 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_NO_SAHF},
2938 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2944 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2946 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2947 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2948 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2949 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2950 0 /* flags are only used for -march switch. */ },
2951 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2952 PTA_64BIT /* flags are only used for -march switch. */ },
2955 int const pta_size = ARRAY_SIZE (processor_alias_table);
2957 /* Set up prefix/suffix so the error messages refer to either the command
2958 line argument, or the attribute(target). */
2967 prefix = "option(\"";
2972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2973 SUBTARGET_OVERRIDE_OPTIONS;
2976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2977 SUBSUBTARGET_OVERRIDE_OPTIONS;
2980 /* -fPIC is the default for x86_64. */
2981 if (TARGET_MACHO && TARGET_64BIT)
2984 /* Need to check -mtune=generic first. */
2985 if (ix86_tune_string)
2987 if (!strcmp (ix86_tune_string, "generic")
2988 || !strcmp (ix86_tune_string, "i686")
2989 /* As special support for cross compilers we read -mtune=native
2990 as -mtune=generic. With native compilers we won't see the
2991 -mtune=native, as it was changed by the driver. */
2992 || !strcmp (ix86_tune_string, "native"))
2995 ix86_tune_string = "generic64";
2997 ix86_tune_string = "generic32";
2999 /* If this call is for setting the option attribute, allow the
3000 generic32/generic64 that was previously set. */
3001 else if (!main_args_p
3002 && (!strcmp (ix86_tune_string, "generic32")
3003 || !strcmp (ix86_tune_string, "generic64")))
3005 else if (!strncmp (ix86_tune_string, "generic", 7))
3006 error ("bad value (%s) for %stune=%s %s",
3007 ix86_tune_string, prefix, suffix, sw);
3008 else if (!strcmp (ix86_tune_string, "x86-64"))
3009 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3010 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3011 prefix, suffix, prefix, suffix, prefix, suffix);
3015 if (ix86_arch_string)
3016 ix86_tune_string = ix86_arch_string;
3017 if (!ix86_tune_string)
3019 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3020 ix86_tune_defaulted = 1;
3023 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3024 need to use a sensible tune option. */
3025 if (!strcmp (ix86_tune_string, "generic")
3026 || !strcmp (ix86_tune_string, "x86-64")
3027 || !strcmp (ix86_tune_string, "i686"))
3030 ix86_tune_string = "generic64";
3032 ix86_tune_string = "generic32";
3036 if (ix86_stringop_string)
3038 if (!strcmp (ix86_stringop_string, "rep_byte"))
3039 stringop_alg = rep_prefix_1_byte;
3040 else if (!strcmp (ix86_stringop_string, "libcall"))
3041 stringop_alg = libcall;
3042 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3043 stringop_alg = rep_prefix_4_byte;
3044 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3046 /* rep; movq isn't available in 32-bit code. */
3047 stringop_alg = rep_prefix_8_byte;
3048 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3049 stringop_alg = loop_1_byte;
3050 else if (!strcmp (ix86_stringop_string, "loop"))
3051 stringop_alg = loop;
3052 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3053 stringop_alg = unrolled_loop;
3055 error ("bad value (%s) for %sstringop-strategy=%s %s",
3056 ix86_stringop_string, prefix, suffix, sw);
3059 if (!ix86_arch_string)
3060 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3062 ix86_arch_specified = 1;
3064 /* Validate -mabi= value. */
3065 if (ix86_abi_string)
3067 if (strcmp (ix86_abi_string, "sysv") == 0)
3068 ix86_abi = SYSV_ABI;
3069 else if (strcmp (ix86_abi_string, "ms") == 0)
3072 error ("unknown ABI (%s) for %sabi=%s %s",
3073 ix86_abi_string, prefix, suffix, sw);
3076 ix86_abi = DEFAULT_ABI;
3078 if (ix86_cmodel_string != 0)
3080 if (!strcmp (ix86_cmodel_string, "small"))
3081 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3082 else if (!strcmp (ix86_cmodel_string, "medium"))
3083 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3084 else if (!strcmp (ix86_cmodel_string, "large"))
3085 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3087 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3088 else if (!strcmp (ix86_cmodel_string, "32"))
3089 ix86_cmodel = CM_32;
3090 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3091 ix86_cmodel = CM_KERNEL;
3093 error ("bad value (%s) for %scmodel=%s %s",
3094 ix86_cmodel_string, prefix, suffix, sw);
3098 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3099 use of rip-relative addressing. This eliminates fixups that
3100 would otherwise be needed if this object is to be placed in a
3101 DLL, and is essentially just as efficient as direct addressing. */
3102 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3103 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3104 else if (TARGET_64BIT)
3105 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3107 ix86_cmodel = CM_32;
3109 if (ix86_asm_string != 0)
3112 && !strcmp (ix86_asm_string, "intel"))
3113 ix86_asm_dialect = ASM_INTEL;
3114 else if (!strcmp (ix86_asm_string, "att"))
3115 ix86_asm_dialect = ASM_ATT;
3117 error ("bad value (%s) for %sasm=%s %s",
3118 ix86_asm_string, prefix, suffix, sw);
3120 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3121 error ("code model %qs not supported in the %s bit mode",
3122 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3123 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3124 sorry ("%i-bit mode not compiled in",
3125 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3127 for (i = 0; i < pta_size; i++)
3128 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3130 ix86_schedule = processor_alias_table[i].schedule;
3131 ix86_arch = processor_alias_table[i].processor;
3132 /* Default cpu tuning to the architecture. */
3133 ix86_tune = ix86_arch;
3135 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3136 error ("CPU you selected does not support x86-64 "
3139 if (processor_alias_table[i].flags & PTA_MMX
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3141 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3142 if (processor_alias_table[i].flags & PTA_3DNOW
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3145 if (processor_alias_table[i].flags & PTA_3DNOW_A
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3147 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3148 if (processor_alias_table[i].flags & PTA_SSE
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3151 if (processor_alias_table[i].flags & PTA_SSE2
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3154 if (processor_alias_table[i].flags & PTA_SSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3157 if (processor_alias_table[i].flags & PTA_SSSE3
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3160 if (processor_alias_table[i].flags & PTA_SSE4_1
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3163 if (processor_alias_table[i].flags & PTA_SSE4_2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3166 if (processor_alias_table[i].flags & PTA_AVX
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3168 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3169 if (processor_alias_table[i].flags & PTA_FMA
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3171 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3172 if (processor_alias_table[i].flags & PTA_SSE4A
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3175 if (processor_alias_table[i].flags & PTA_FMA4
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3177 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3178 if (processor_alias_table[i].flags & PTA_XOP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3181 if (processor_alias_table[i].flags & PTA_LWP
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3183 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3184 if (processor_alias_table[i].flags & PTA_ABM
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3186 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3187 if (processor_alias_table[i].flags & PTA_CX16
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3189 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3190 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3192 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3193 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3196 if (processor_alias_table[i].flags & PTA_MOVBE
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3198 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3199 if (processor_alias_table[i].flags & PTA_AES
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3201 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3202 if (processor_alias_table[i].flags & PTA_PCLMUL
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3204 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3205 if (processor_alias_table[i].flags & PTA_FSGSBASE
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3207 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3208 if (processor_alias_table[i].flags & PTA_RDRND
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3210 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3211 if (processor_alias_table[i].flags & PTA_F16C
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3213 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3214 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3215 x86_prefetch_sse = true;
3220 if (!strcmp (ix86_arch_string, "generic"))
3221 error ("generic CPU can be used only for %stune=%s %s",
3222 prefix, suffix, sw);
3223 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3224 error ("bad value (%s) for %sarch=%s %s",
3225 ix86_arch_string, prefix, suffix, sw);
3227 ix86_arch_mask = 1u << ix86_arch;
3228 for (i = 0; i < X86_ARCH_LAST; ++i)
3229 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_tune = processor_alias_table[i].processor;
3236 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3238 if (ix86_tune_defaulted)
3240 ix86_tune_string = "x86-64";
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_tune_string,
3243 processor_alias_table[i].name))
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3249 error ("CPU you selected does not support x86-64 "
3252 /* Intel CPUs have always interpreted SSE prefetch instructions as
3253 NOPs; so, we can enable SSE prefetch instructions even when
3254 -mtune (rather than -march) points us to a processor that has them.
3255 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3256 higher processors. */
3258 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3259 x86_prefetch_sse = true;
3263 if (ix86_tune_specified && i == pta_size)
3264 error ("bad value (%s) for %stune=%s %s",
3265 ix86_tune_string, prefix, suffix, sw);
3267 ix86_tune_mask = 1u << ix86_tune;
3268 for (i = 0; i < X86_TUNE_LAST; ++i)
3269 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3271 #ifndef USE_IX86_FRAME_POINTER
3272 #define USE_IX86_FRAME_POINTER 0
3275 /* Set the default values for switches whose default depends on TARGET_64BIT
3276 in case they weren't overwritten by command line options. */
3281 if (flag_omit_frame_pointer == 2)
3282 flag_omit_frame_pointer = 1;
3283 if (flag_asynchronous_unwind_tables == 2)
3284 flag_asynchronous_unwind_tables = 1;
3285 if (flag_pcc_struct_return == 2)
3286 flag_pcc_struct_return = 0;
3292 if (flag_omit_frame_pointer == 2)
3293 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3294 if (flag_asynchronous_unwind_tables == 2)
3295 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3296 if (flag_pcc_struct_return == 2)
3297 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3301 ix86_cost = &ix86_size_cost;
3303 ix86_cost = processor_target_table[ix86_tune].cost;
3305 /* Arrange to set up i386_stack_locals for all functions. */
3306 init_machine_status = ix86_init_machine_status;
3308 /* Validate -mregparm= value. */
3309 if (ix86_regparm_string)
3312 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3313 i = atoi (ix86_regparm_string);
3314 if (i < 0 || i > REGPARM_MAX)
3315 error ("%sregparm=%d%s is not between 0 and %d",
3316 prefix, i, suffix, REGPARM_MAX);
3321 ix86_regparm = REGPARM_MAX;
3323 /* If the user has provided any of the -malign-* options,
3324 warn and use that value only if -falign-* is not set.
3325 Remove this code in GCC 3.2 or later. */
3326 if (ix86_align_loops_string)
3328 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3329 prefix, suffix, suffix);
3330 if (align_loops == 0)
3332 i = atoi (ix86_align_loops_string);
3333 if (i < 0 || i > MAX_CODE_ALIGN)
3334 error ("%salign-loops=%d%s is not between 0 and %d",
3335 prefix, i, suffix, MAX_CODE_ALIGN);
3337 align_loops = 1 << i;
3341 if (ix86_align_jumps_string)
3343 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3344 prefix, suffix, suffix);
3345 if (align_jumps == 0)
3347 i = atoi (ix86_align_jumps_string);
3348 if (i < 0 || i > MAX_CODE_ALIGN)
3349 error ("%salign-loops=%d%s is not between 0 and %d",
3350 prefix, i, suffix, MAX_CODE_ALIGN);
3352 align_jumps = 1 << i;
3356 if (ix86_align_funcs_string)
3358 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3359 prefix, suffix, suffix);
3360 if (align_functions == 0)
3362 i = atoi (ix86_align_funcs_string);
3363 if (i < 0 || i > MAX_CODE_ALIGN)
3364 error ("%salign-loops=%d%s is not between 0 and %d",
3365 prefix, i, suffix, MAX_CODE_ALIGN);
3367 align_functions = 1 << i;
3371 /* Default align_* from the processor table. */
3372 if (align_loops == 0)
3374 align_loops = processor_target_table[ix86_tune].align_loop;
3375 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3377 if (align_jumps == 0)
3379 align_jumps = processor_target_table[ix86_tune].align_jump;
3380 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3382 if (align_functions == 0)
3384 align_functions = processor_target_table[ix86_tune].align_func;
3387 /* Validate -mbranch-cost= value, or provide default. */
3388 ix86_branch_cost = ix86_cost->branch_cost;
3389 if (ix86_branch_cost_string)
3391 i = atoi (ix86_branch_cost_string);
3393 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3395 ix86_branch_cost = i;
3397 if (ix86_section_threshold_string)
3399 i = atoi (ix86_section_threshold_string);
3401 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3403 ix86_section_threshold = i;
3406 if (ix86_tls_dialect_string)
3408 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU;
3410 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3411 ix86_tls_dialect = TLS_DIALECT_GNU2;
3413 error ("bad value (%s) for %stls-dialect=%s %s",
3414 ix86_tls_dialect_string, prefix, suffix, sw);
3417 if (ix87_precision_string)
3419 i = atoi (ix87_precision_string);
3420 if (i != 32 && i != 64 && i != 80)
3421 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3426 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3428 /* Enable by default the SSE and MMX builtins. Do allow the user to
3429 explicitly disable any of these. In particular, disabling SSE and
3430 MMX for kernel code is extremely useful. */
3431 if (!ix86_arch_specified)
3433 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3434 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3437 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3441 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3443 if (!ix86_arch_specified)
3445 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3447 /* i386 ABI does not specify red zone. It still makes sense to use it
3448 when programmer takes care to stack from being destroyed. */
3449 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3450 target_flags |= MASK_NO_RED_ZONE;
3453 /* Keep nonleaf frame pointers. */
3454 if (flag_omit_frame_pointer)
3455 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3456 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3457 flag_omit_frame_pointer = 1;
3459 /* If we're doing fast math, we don't care about comparison order
3460 wrt NaNs. This lets us use a shorter comparison sequence. */
3461 if (flag_finite_math_only)
3462 target_flags &= ~MASK_IEEE_FP;
3464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3465 since the insns won't need emulation. */
3466 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3467 target_flags &= ~MASK_NO_FANCY_MATH_387;
3469 /* Likewise, if the target doesn't have a 387, or we've specified
3470 software floating point, don't use 387 inline intrinsics. */
3472 target_flags |= MASK_NO_FANCY_MATH_387;
3474 /* Turn on MMX builtins for -msse. */
3477 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3478 x86_prefetch_sse = true;
3481 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3482 if (TARGET_SSE4_2 || TARGET_ABM)
3483 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3485 /* Validate -mpreferred-stack-boundary= value or default it to
3486 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3487 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3488 if (ix86_preferred_stack_boundary_string)
3490 i = atoi (ix86_preferred_stack_boundary_string);
3491 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3492 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3493 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3495 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3498 /* Set the default value for -mstackrealign. */
3499 if (ix86_force_align_arg_pointer == -1)
3500 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3502 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3504 /* Validate -mincoming-stack-boundary= value or default it to
3505 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3506 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3507 if (ix86_incoming_stack_boundary_string)
3509 i = atoi (ix86_incoming_stack_boundary_string);
3510 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3511 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3512 i, TARGET_64BIT ? 4 : 2);
3515 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3516 ix86_incoming_stack_boundary
3517 = ix86_user_incoming_stack_boundary;
3521 /* Accept -msseregparm only if at least SSE support is enabled. */
3522 if (TARGET_SSEREGPARM
3524 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3526 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3527 if (ix86_fpmath_string != 0)
3529 if (! strcmp (ix86_fpmath_string, "387"))
3530 ix86_fpmath = FPMATH_387;
3531 else if (! strcmp (ix86_fpmath_string, "sse"))
3535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3536 ix86_fpmath = FPMATH_387;
3539 ix86_fpmath = FPMATH_SSE;
3541 else if (! strcmp (ix86_fpmath_string, "387,sse")
3542 || ! strcmp (ix86_fpmath_string, "387+sse")
3543 || ! strcmp (ix86_fpmath_string, "sse,387")
3544 || ! strcmp (ix86_fpmath_string, "sse+387")
3545 || ! strcmp (ix86_fpmath_string, "both"))
3549 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3550 ix86_fpmath = FPMATH_387;
3552 else if (!TARGET_80387)
3554 warning (0, "387 instruction set disabled, using SSE arithmetics");
3555 ix86_fpmath = FPMATH_SSE;
3558 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3561 error ("bad value (%s) for %sfpmath=%s %s",
3562 ix86_fpmath_string, prefix, suffix, sw);
3565 /* If the i387 is disabled, then do not return values in it. */
3567 target_flags &= ~MASK_FLOAT_RETURNS;
3569 /* Use external vectorized library in vectorizing intrinsics. */
3570 if (ix86_veclibabi_string)
3572 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_svml;
3574 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3575 ix86_veclib_handler = ix86_veclibabi_acml;
3577 error ("unknown vectorization library ABI type (%s) for "
3578 "%sveclibabi=%s %s", ix86_veclibabi_string,
3579 prefix, suffix, sw);
3582 if ((!USE_IX86_FRAME_POINTER
3583 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3584 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3586 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3588 /* ??? Unwind info is not correct around the CFG unless either a frame
3589 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3590 unwind info generation to be aware of the CFG and propagating states
3592 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3593 || flag_exceptions || flag_non_call_exceptions)
3594 && flag_omit_frame_pointer
3595 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3597 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3598 warning (0, "unwind tables currently require either a frame pointer "
3599 "or %saccumulate-outgoing-args%s for correctness",
3601 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3604 /* If stack probes are required, the space used for large function
3605 arguments on the stack must also be probed, so enable
3606 -maccumulate-outgoing-args so this happens in the prologue. */
3607 if (TARGET_STACK_PROBE
3608 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3610 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3611 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3612 "for correctness", prefix, suffix);
3613 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3616 /* For sane SSE instruction set generation we need fcomi instruction.
3617 It is safe to enable all CMOVE instructions. */
3621 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3624 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3625 p = strchr (internal_label_prefix, 'X');
3626 internal_label_prefix_len = p - internal_label_prefix;
3630 /* When scheduling description is not available, disable scheduler pass
3631 so it won't slow down the compilation and make x87 code slower. */
3632 if (!TARGET_SCHEDULE)
3633 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3635 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3636 set_param_value ("simultaneous-prefetches",
3637 ix86_cost->simultaneous_prefetches);
3638 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3639 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3640 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3641 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3642 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3643 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3645 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3646 if (flag_prefetch_loop_arrays < 0
3649 && software_prefetching_beneficial_p ())
3650 flag_prefetch_loop_arrays = 1;
3652 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3653 can be optimized to ap = __builtin_next_arg (0). */
3654 if (!TARGET_64BIT && !flag_split_stack)
3655 targetm.expand_builtin_va_start = NULL;
3659 ix86_gen_leave = gen_leave_rex64;
3660 ix86_gen_add3 = gen_adddi3;
3661 ix86_gen_sub3 = gen_subdi3;
3662 ix86_gen_sub3_carry = gen_subdi3_carry;
3663 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3664 ix86_gen_monitor = gen_sse3_monitor64;
3665 ix86_gen_andsp = gen_anddi3;
3666 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3667 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3668 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3672 ix86_gen_leave = gen_leave;
3673 ix86_gen_add3 = gen_addsi3;
3674 ix86_gen_sub3 = gen_subsi3;
3675 ix86_gen_sub3_carry = gen_subsi3_carry;
3676 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3677 ix86_gen_monitor = gen_sse3_monitor;
3678 ix86_gen_andsp = gen_andsi3;
3679 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3680 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3681 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3685 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3687 target_flags |= MASK_CLD & ~target_flags_explicit;
3690 if (!TARGET_64BIT && flag_pic)
3692 if (flag_fentry > 0)
3693 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3696 if (flag_fentry < 0)
3698 #if defined(PROFILE_BEFORE_PROLOGUE)
3705 /* Save the initial options in case the user does function specific options */
3707 target_option_default_node = target_option_current_node
3708 = build_target_option_node ();
3711 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3714 ix86_option_override (void)
3716 ix86_option_override_internal (true);
3719 /* Update register usage after having seen the compiler flags. */
3722 ix86_conditional_register_usage (void)
3727 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3729 if (fixed_regs[i] > 1)
3730 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3731 if (call_used_regs[i] > 1)
3732 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3735 /* The PIC register, if it exists, is fixed. */
3736 j = PIC_OFFSET_TABLE_REGNUM;
3737 if (j != INVALID_REGNUM)
3738 fixed_regs[j] = call_used_regs[j] = 1;
3740 /* The MS_ABI changes the set of call-used registers. */
3741 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3743 call_used_regs[SI_REG] = 0;
3744 call_used_regs[DI_REG] = 0;
3745 call_used_regs[XMM6_REG] = 0;
3746 call_used_regs[XMM7_REG] = 0;
3747 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3748 call_used_regs[i] = 0;
3751 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3752 other call-clobbered regs for 64-bit. */
3755 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3757 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3758 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3759 && call_used_regs[i])
3760 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3763 /* If MMX is disabled, squash the registers. */
3765 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3766 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3767 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3769 /* If SSE is disabled, squash the registers. */
3771 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3772 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3773 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3775 /* If the FPU is disabled, squash the registers. */
3776 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3777 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3778 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3779 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3781 /* If 32-bit, squash the 64-bit registers. */
3784 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3786 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3792 /* Save the current options */
3795 ix86_function_specific_save (struct cl_target_option *ptr)
3797 ptr->arch = ix86_arch;
3798 ptr->schedule = ix86_schedule;
3799 ptr->tune = ix86_tune;
3800 ptr->fpmath = ix86_fpmath;
3801 ptr->branch_cost = ix86_branch_cost;
3802 ptr->tune_defaulted = ix86_tune_defaulted;
3803 ptr->arch_specified = ix86_arch_specified;
3804 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3805 ptr->target_flags_explicit = target_flags_explicit;
3807 /* The fields are char but the variables are not; make sure the
3808 values fit in the fields. */
3809 gcc_assert (ptr->arch == ix86_arch);
3810 gcc_assert (ptr->schedule == ix86_schedule);
3811 gcc_assert (ptr->tune == ix86_tune);
3812 gcc_assert (ptr->fpmath == ix86_fpmath);
3813 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3816 /* Restore the current options */
3819 ix86_function_specific_restore (struct cl_target_option *ptr)
3821 enum processor_type old_tune = ix86_tune;
3822 enum processor_type old_arch = ix86_arch;
3823 unsigned int ix86_arch_mask, ix86_tune_mask;
3826 ix86_arch = (enum processor_type) ptr->arch;
3827 ix86_schedule = (enum attr_cpu) ptr->schedule;
3828 ix86_tune = (enum processor_type) ptr->tune;
3829 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3830 ix86_branch_cost = ptr->branch_cost;
3831 ix86_tune_defaulted = ptr->tune_defaulted;
3832 ix86_arch_specified = ptr->arch_specified;
3833 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3834 target_flags_explicit = ptr->target_flags_explicit;
3836 /* Recreate the arch feature tests if the arch changed */
3837 if (old_arch != ix86_arch)
3839 ix86_arch_mask = 1u << ix86_arch;
3840 for (i = 0; i < X86_ARCH_LAST; ++i)
3841 ix86_arch_features[i]
3842 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3845 /* Recreate the tune optimization tests */
3846 if (old_tune != ix86_tune)
3848 ix86_tune_mask = 1u << ix86_tune;
3849 for (i = 0; i < X86_TUNE_LAST; ++i)
3850 ix86_tune_features[i]
3851 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3855 /* Print the current options */
3858 ix86_function_specific_print (FILE *file, int indent,
3859 struct cl_target_option *ptr)
3862 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3863 NULL, NULL, NULL, false);
3865 fprintf (file, "%*sarch = %d (%s)\n",
3868 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3869 ? cpu_names[ptr->arch]
3872 fprintf (file, "%*stune = %d (%s)\n",
3875 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3876 ? cpu_names[ptr->tune]
3879 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3880 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3881 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3882 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3886 fprintf (file, "%*s%s\n", indent, "", target_string);
3887 free (target_string);
3892 /* Inner function to process the attribute((target(...))), take an argument and
3893 set the current options from the argument. If we have a list, recursively go
3897 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3902 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3903 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3904 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3905 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3920 enum ix86_opt_type type;
3925 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3926 IX86_ATTR_ISA ("abm", OPT_mabm),
3927 IX86_ATTR_ISA ("aes", OPT_maes),
3928 IX86_ATTR_ISA ("avx", OPT_mavx),
3929 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3930 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3931 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3932 IX86_ATTR_ISA ("sse", OPT_msse),
3933 IX86_ATTR_ISA ("sse2", OPT_msse2),
3934 IX86_ATTR_ISA ("sse3", OPT_msse3),
3935 IX86_ATTR_ISA ("sse4", OPT_msse4),
3936 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3937 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3938 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3939 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3940 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3941 IX86_ATTR_ISA ("xop", OPT_mxop),
3942 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3943 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3944 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3945 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3947 /* string options */
3948 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3949 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3950 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3953 IX86_ATTR_YES ("cld",
3957 IX86_ATTR_NO ("fancy-math-387",
3958 OPT_mfancy_math_387,
3959 MASK_NO_FANCY_MATH_387),
3961 IX86_ATTR_YES ("ieee-fp",
3965 IX86_ATTR_YES ("inline-all-stringops",
3966 OPT_minline_all_stringops,
3967 MASK_INLINE_ALL_STRINGOPS),
3969 IX86_ATTR_YES ("inline-stringops-dynamically",
3970 OPT_minline_stringops_dynamically,
3971 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3973 IX86_ATTR_NO ("align-stringops",
3974 OPT_mno_align_stringops,
3975 MASK_NO_ALIGN_STRINGOPS),
3977 IX86_ATTR_YES ("recip",
3983 /* If this is a list, recurse to get the options. */
3984 if (TREE_CODE (args) == TREE_LIST)
3988 for (; args; args = TREE_CHAIN (args))
3989 if (TREE_VALUE (args)
3990 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3996 else if (TREE_CODE (args) != STRING_CST)
3999 /* Handle multiple arguments separated by commas. */
4000 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4002 while (next_optstr && *next_optstr != '\0')
4004 char *p = next_optstr;
4006 char *comma = strchr (next_optstr, ',');
4007 const char *opt_string;
4008 size_t len, opt_len;
4013 enum ix86_opt_type type = ix86_opt_unknown;
4019 len = comma - next_optstr;
4020 next_optstr = comma + 1;
4028 /* Recognize no-xxx. */
4029 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4038 /* Find the option. */
4041 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4043 type = attrs[i].type;
4044 opt_len = attrs[i].len;
4045 if (ch == attrs[i].string[0]
4046 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4047 && memcmp (p, attrs[i].string, opt_len) == 0)
4050 mask = attrs[i].mask;
4051 opt_string = attrs[i].string;
4056 /* Process the option. */
4059 error ("attribute(target(\"%s\")) is unknown", orig_p);
4063 else if (type == ix86_opt_isa)
4064 ix86_handle_option (opt, p, opt_set_p);
4066 else if (type == ix86_opt_yes || type == ix86_opt_no)
4068 if (type == ix86_opt_no)
4069 opt_set_p = !opt_set_p;
4072 target_flags |= mask;
4074 target_flags &= ~mask;
4077 else if (type == ix86_opt_str)
4081 error ("option(\"%s\") was already specified", opt_string);
4085 p_strings[opt] = xstrdup (p + opt_len);
4095 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4098 ix86_valid_target_attribute_tree (tree args)
4100 const char *orig_arch_string = ix86_arch_string;
4101 const char *orig_tune_string = ix86_tune_string;
4102 const char *orig_fpmath_string = ix86_fpmath_string;
4103 int orig_tune_defaulted = ix86_tune_defaulted;
4104 int orig_arch_specified = ix86_arch_specified;
4105 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4108 struct cl_target_option *def
4109 = TREE_TARGET_OPTION (target_option_default_node);
4111 /* Process each of the options on the chain. */
4112 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4115 /* If the changed options are different from the default, rerun
4116 ix86_option_override_internal, and then save the options away.
4117 The string options are are attribute options, and will be undone
4118 when we copy the save structure. */
4119 if (ix86_isa_flags != def->x_ix86_isa_flags
4120 || target_flags != def->x_target_flags
4121 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4122 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4123 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4125 /* If we are using the default tune= or arch=, undo the string assigned,
4126 and use the default. */
4127 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4128 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4129 else if (!orig_arch_specified)
4130 ix86_arch_string = NULL;
4132 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4133 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4134 else if (orig_tune_defaulted)
4135 ix86_tune_string = NULL;
4137 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4138 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4139 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4140 else if (!TARGET_64BIT && TARGET_SSE)
4141 ix86_fpmath_string = "sse,387";
4143 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4144 ix86_option_override_internal (false);
4146 /* Add any builtin functions with the new isa if any. */
4147 ix86_add_new_builtins (ix86_isa_flags);
4149 /* Save the current options unless we are validating options for
4151 t = build_target_option_node ();
4153 ix86_arch_string = orig_arch_string;
4154 ix86_tune_string = orig_tune_string;
4155 ix86_fpmath_string = orig_fpmath_string;
4157 /* Free up memory allocated to hold the strings */
4158 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4159 if (option_strings[i])
4160 free (option_strings[i]);
4166 /* Hook to validate attribute((target("string"))). */
4169 ix86_valid_target_attribute_p (tree fndecl,
4170 tree ARG_UNUSED (name),
4172 int ARG_UNUSED (flags))
4174 struct cl_target_option cur_target;
4176 tree old_optimize = build_optimization_node ();
4177 tree new_target, new_optimize;
4178 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4180 /* If the function changed the optimization levels as well as setting target
4181 options, start with the optimizations specified. */
4182 if (func_optimize && func_optimize != old_optimize)
4183 cl_optimization_restore (&global_options,
4184 TREE_OPTIMIZATION (func_optimize));
4186 /* The target attributes may also change some optimization flags, so update
4187 the optimization options if necessary. */
4188 cl_target_option_save (&cur_target, &global_options);
4189 new_target = ix86_valid_target_attribute_tree (args);
4190 new_optimize = build_optimization_node ();
4197 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4199 if (old_optimize != new_optimize)
4200 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4203 cl_target_option_restore (&global_options, &cur_target);
4205 if (old_optimize != new_optimize)
4206 cl_optimization_restore (&global_options,
4207 TREE_OPTIMIZATION (old_optimize));
4213 /* Hook to determine if one function can safely inline another. */
4216 ix86_can_inline_p (tree caller, tree callee)
4219 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4220 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4222 /* If callee has no option attributes, then it is ok to inline. */
4226 /* If caller has no option attributes, but callee does then it is not ok to
4228 else if (!caller_tree)
4233 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4234 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4236 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4237 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4239 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4240 != callee_opts->x_ix86_isa_flags)
4243 /* See if we have the same non-isa options. */
4244 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4247 /* See if arch, tune, etc. are the same. */
4248 else if (caller_opts->arch != callee_opts->arch)
4251 else if (caller_opts->tune != callee_opts->tune)
4254 else if (caller_opts->fpmath != callee_opts->fpmath)
4257 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4268 /* Remember the last target of ix86_set_current_function. */
4269 static GTY(()) tree ix86_previous_fndecl;
4271 /* Establish appropriate back-end context for processing the function
4272 FNDECL. The argument might be NULL to indicate processing at top
4273 level, outside of any function scope. */
4275 ix86_set_current_function (tree fndecl)
4277 /* Only change the context if the function changes. This hook is called
4278 several times in the course of compiling a function, and we don't want to
4279 slow things down too much or call target_reinit when it isn't safe. */
4280 if (fndecl && fndecl != ix86_previous_fndecl)
4282 tree old_tree = (ix86_previous_fndecl
4283 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4286 tree new_tree = (fndecl
4287 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4290 ix86_previous_fndecl = fndecl;
4291 if (old_tree == new_tree)
4296 cl_target_option_restore (&global_options,
4297 TREE_TARGET_OPTION (new_tree));
4303 struct cl_target_option *def
4304 = TREE_TARGET_OPTION (target_option_current_node);
4306 cl_target_option_restore (&global_options, def);
4313 /* Return true if this goes in large data/bss. */
4316 ix86_in_large_data_p (tree exp)
4318 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4321 /* Functions are never large data. */
4322 if (TREE_CODE (exp) == FUNCTION_DECL)
4325 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4327 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4328 if (strcmp (section, ".ldata") == 0
4329 || strcmp (section, ".lbss") == 0)
4335 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4337 /* If this is an incomplete type with size 0, then we can't put it
4338 in data because it might be too big when completed. */
4339 if (!size || size > ix86_section_threshold)
4346 /* Switch to the appropriate section for output of DECL.
4347 DECL is either a `VAR_DECL' node or a constant of some sort.
4348 RELOC indicates whether forming the initial value of DECL requires
4349 link-time relocations. */
4351 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4355 x86_64_elf_select_section (tree decl, int reloc,
4356 unsigned HOST_WIDE_INT align)
4358 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4359 && ix86_in_large_data_p (decl))
4361 const char *sname = NULL;
4362 unsigned int flags = SECTION_WRITE;
4363 switch (categorize_decl_for_section (decl, reloc))
4368 case SECCAT_DATA_REL:
4369 sname = ".ldata.rel";
4371 case SECCAT_DATA_REL_LOCAL:
4372 sname = ".ldata.rel.local";
4374 case SECCAT_DATA_REL_RO:
4375 sname = ".ldata.rel.ro";
4377 case SECCAT_DATA_REL_RO_LOCAL:
4378 sname = ".ldata.rel.ro.local";
4382 flags |= SECTION_BSS;
4385 case SECCAT_RODATA_MERGE_STR:
4386 case SECCAT_RODATA_MERGE_STR_INIT:
4387 case SECCAT_RODATA_MERGE_CONST:
4391 case SECCAT_SRODATA:
4398 /* We don't split these for medium model. Place them into
4399 default sections and hope for best. */
4404 /* We might get called with string constants, but get_named_section
4405 doesn't like them as they are not DECLs. Also, we need to set
4406 flags in that case. */
4408 return get_section (sname, flags, NULL);
4409 return get_named_section (decl, sname, reloc);
4412 return default_elf_select_section (decl, reloc, align);
4415 /* Build up a unique section name, expressed as a
4416 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4417 RELOC indicates whether the initial value of EXP requires
4418 link-time relocations. */
4420 static void ATTRIBUTE_UNUSED
4421 x86_64_elf_unique_section (tree decl, int reloc)
4423 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4424 && ix86_in_large_data_p (decl))
4426 const char *prefix = NULL;
4427 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4428 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4430 switch (categorize_decl_for_section (decl, reloc))
4433 case SECCAT_DATA_REL:
4434 case SECCAT_DATA_REL_LOCAL:
4435 case SECCAT_DATA_REL_RO:
4436 case SECCAT_DATA_REL_RO_LOCAL:
4437 prefix = one_only ? ".ld" : ".ldata";
4440 prefix = one_only ? ".lb" : ".lbss";
4443 case SECCAT_RODATA_MERGE_STR:
4444 case SECCAT_RODATA_MERGE_STR_INIT:
4445 case SECCAT_RODATA_MERGE_CONST:
4446 prefix = one_only ? ".lr" : ".lrodata";
4448 case SECCAT_SRODATA:
4455 /* We don't split these for medium model. Place them into
4456 default sections and hope for best. */
4461 const char *name, *linkonce;
4464 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4465 name = targetm.strip_name_encoding (name);
4467 /* If we're using one_only, then there needs to be a .gnu.linkonce
4468 prefix to the section name. */
4469 linkonce = one_only ? ".gnu.linkonce" : "";
4471 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4473 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4477 default_unique_section (decl, reloc);
4480 #ifdef COMMON_ASM_OP
4481 /* This says how to output assembler code to declare an
4482 uninitialized external linkage data object.
4484 For medium model x86-64 we need to use .largecomm opcode for
4487 x86_elf_aligned_common (FILE *file,
4488 const char *name, unsigned HOST_WIDE_INT size,
4491 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4492 && size > (unsigned int)ix86_section_threshold)
4493 fputs (".largecomm\t", file);
4495 fputs (COMMON_ASM_OP, file);
4496 assemble_name (file, name);
4497 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4498 size, align / BITS_PER_UNIT);
4502 /* Utility function for targets to use in implementing
4503 ASM_OUTPUT_ALIGNED_BSS. */
4506 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4507 const char *name, unsigned HOST_WIDE_INT size,
4510 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4511 && size > (unsigned int)ix86_section_threshold)
4512 switch_to_section (get_named_section (decl, ".lbss", 0));
4514 switch_to_section (bss_section);
4515 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4516 #ifdef ASM_DECLARE_OBJECT_NAME
4517 last_assemble_variable_decl = decl;
4518 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4520 /* Standard thing is just output label for the object. */
4521 ASM_OUTPUT_LABEL (file, name);
4522 #endif /* ASM_DECLARE_OBJECT_NAME */
4523 ASM_OUTPUT_SKIP (file, size ? size : 1);
4527 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4529 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4530 make the problem with not enough registers even worse. */
4531 #ifdef INSN_SCHEDULING
4533 flag_schedule_insns = 0;
4537 /* The Darwin libraries never set errno, so we might as well
4538 avoid calling them when that's the only reason we would. */
4539 flag_errno_math = 0;
4541 /* The default values of these switches depend on the TARGET_64BIT
4542 that is not known at this moment. Mark these values with 2 and
4543 let user the to override these. In case there is no command line
4544 option specifying them, we will set the defaults in
4545 ix86_option_override_internal. */
4547 flag_omit_frame_pointer = 2;
4549 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4553 flag_pcc_struct_return = 2;
4554 flag_asynchronous_unwind_tables = 2;
4555 flag_vect_cost_model = 1;
4556 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4557 SUBTARGET_OPTIMIZATION_OPTIONS;
4561 /* Decide whether we must probe the stack before any space allocation
4562 on this target. It's essentially TARGET_STACK_PROBE except when
4563 -fstack-check causes the stack to be already probed differently. */
4566 ix86_target_stack_probe (void)
4568 /* Do not probe the stack twice if static stack checking is enabled. */
4569 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4572 return TARGET_STACK_PROBE;
4575 /* Decide whether we can make a sibling call to a function. DECL is the
4576 declaration of the function being targeted by the call and EXP is the
4577 CALL_EXPR representing the call. */
4580 ix86_function_ok_for_sibcall (tree decl, tree exp)
4582 tree type, decl_or_type;
4585 /* If we are generating position-independent code, we cannot sibcall
4586 optimize any indirect call, or a direct call to a global function,
4587 as the PLT requires %ebx be live. */
4588 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4591 /* If we need to align the outgoing stack, then sibcalling would
4592 unalign the stack, which may break the called function. */
4593 if (ix86_minimum_incoming_stack_boundary (true)
4594 < PREFERRED_STACK_BOUNDARY)
4599 decl_or_type = decl;
4600 type = TREE_TYPE (decl);
4604 /* We're looking at the CALL_EXPR, we need the type of the function. */
4605 type = CALL_EXPR_FN (exp); /* pointer expression */
4606 type = TREE_TYPE (type); /* pointer type */
4607 type = TREE_TYPE (type); /* function type */
4608 decl_or_type = type;
4611 /* Check that the return value locations are the same. Like
4612 if we are returning floats on the 80387 register stack, we cannot
4613 make a sibcall from a function that doesn't return a float to a
4614 function that does or, conversely, from a function that does return
4615 a float to a function that doesn't; the necessary stack adjustment
4616 would not be executed. This is also the place we notice
4617 differences in the return value ABI. Note that it is ok for one
4618 of the functions to have void return type as long as the return
4619 value of the other is passed in a register. */
4620 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4621 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4623 if (STACK_REG_P (a) || STACK_REG_P (b))
4625 if (!rtx_equal_p (a, b))
4628 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4630 else if (!rtx_equal_p (a, b))
4635 /* The SYSV ABI has more call-clobbered registers;
4636 disallow sibcalls from MS to SYSV. */
4637 if (cfun->machine->call_abi == MS_ABI
4638 && ix86_function_type_abi (type) == SYSV_ABI)
4643 /* If this call is indirect, we'll need to be able to use a
4644 call-clobbered register for the address of the target function.
4645 Make sure that all such registers are not used for passing
4646 parameters. Note that DLLIMPORT functions are indirect. */
4648 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4650 if (ix86_function_regparm (type, NULL) >= 3)
4652 /* ??? Need to count the actual number of registers to be used,
4653 not the possible number of registers. Fix later. */
4659 /* Otherwise okay. That also includes certain types of indirect calls. */
4663 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4664 and "sseregparm" calling convention attributes;
4665 arguments as in struct attribute_spec.handler. */
4668 ix86_handle_cconv_attribute (tree *node, tree name,
4670 int flags ATTRIBUTE_UNUSED,
4673 if (TREE_CODE (*node) != FUNCTION_TYPE
4674 && TREE_CODE (*node) != METHOD_TYPE
4675 && TREE_CODE (*node) != FIELD_DECL
4676 && TREE_CODE (*node) != TYPE_DECL)
4678 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4680 *no_add_attrs = true;
4684 /* Can combine regparm with all attributes but fastcall. */
4685 if (is_attribute_p ("regparm", name))
4689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4691 error ("fastcall and regparm attributes are not compatible");
4694 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4696 error ("regparam and thiscall attributes are not compatible");
4699 cst = TREE_VALUE (args);
4700 if (TREE_CODE (cst) != INTEGER_CST)
4702 warning (OPT_Wattributes,
4703 "%qE attribute requires an integer constant argument",
4705 *no_add_attrs = true;
4707 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4709 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4711 *no_add_attrs = true;
4719 /* Do not warn when emulating the MS ABI. */
4720 if ((TREE_CODE (*node) != FUNCTION_TYPE
4721 && TREE_CODE (*node) != METHOD_TYPE)
4722 || ix86_function_type_abi (*node) != MS_ABI)
4723 warning (OPT_Wattributes, "%qE attribute ignored",
4725 *no_add_attrs = true;
4729 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4730 if (is_attribute_p ("fastcall", name))
4732 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4734 error ("fastcall and cdecl attributes are not compatible");
4736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4738 error ("fastcall and stdcall attributes are not compatible");
4740 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4742 error ("fastcall and regparm attributes are not compatible");
4744 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4746 error ("fastcall and thiscall attributes are not compatible");
4750 /* Can combine stdcall with fastcall (redundant), regparm and
4752 else if (is_attribute_p ("stdcall", name))
4754 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4756 error ("stdcall and cdecl attributes are not compatible");
4758 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4760 error ("stdcall and fastcall attributes are not compatible");
4762 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4764 error ("stdcall and thiscall attributes are not compatible");
4768 /* Can combine cdecl with regparm and sseregparm. */
4769 else if (is_attribute_p ("cdecl", name))
4771 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4773 error ("stdcall and cdecl attributes are not compatible");
4775 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4777 error ("fastcall and cdecl attributes are not compatible");
4779 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4781 error ("cdecl and thiscall attributes are not compatible");
4784 else if (is_attribute_p ("thiscall", name))
4786 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4787 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4789 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4791 error ("stdcall and thiscall attributes are not compatible");
4793 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4795 error ("fastcall and thiscall attributes are not compatible");
4797 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4799 error ("cdecl and thiscall attributes are not compatible");
4803 /* Can combine sseregparm with all attributes. */
4808 /* Return 0 if the attributes for two types are incompatible, 1 if they
4809 are compatible, and 2 if they are nearly compatible (which causes a
4810 warning to be generated). */
4813 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4815 /* Check for mismatch of non-default calling convention. */
4816 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4818 if (TREE_CODE (type1) != FUNCTION_TYPE
4819 && TREE_CODE (type1) != METHOD_TYPE)
4822 /* Check for mismatched fastcall/regparm types. */
4823 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4824 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4825 || (ix86_function_regparm (type1, NULL)
4826 != ix86_function_regparm (type2, NULL)))
4829 /* Check for mismatched sseregparm types. */
4830 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4831 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4834 /* Check for mismatched thiscall types. */
4835 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4836 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4839 /* Check for mismatched return types (cdecl vs stdcall). */
4840 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4841 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4847 /* Return the regparm value for a function with the indicated TYPE and DECL.
4848 DECL may be NULL when calling function indirectly
4849 or considering a libcall. */
4852 ix86_function_regparm (const_tree type, const_tree decl)
4858 return (ix86_function_type_abi (type) == SYSV_ABI
4859 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4861 regparm = ix86_regparm;
4862 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4865 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4869 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4872 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4875 /* Use register calling convention for local functions when possible. */
4877 && TREE_CODE (decl) == FUNCTION_DECL
4879 && !(profile_flag && !flag_fentry))
4881 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4882 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4885 int local_regparm, globals = 0, regno;
4887 /* Make sure no regparm register is taken by a
4888 fixed register variable. */
4889 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4890 if (fixed_regs[local_regparm])
4893 /* We don't want to use regparm(3) for nested functions as
4894 these use a static chain pointer in the third argument. */
4895 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4898 /* In 32-bit mode save a register for the split stack. */
4899 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4902 /* Each fixed register usage increases register pressure,
4903 so less registers should be used for argument passing.
4904 This functionality can be overriden by an explicit
4906 for (regno = 0; regno <= DI_REG; regno++)
4907 if (fixed_regs[regno])
4911 = globals < local_regparm ? local_regparm - globals : 0;
4913 if (local_regparm > regparm)
4914 regparm = local_regparm;
4921 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4922 DFmode (2) arguments in SSE registers for a function with the
4923 indicated TYPE and DECL. DECL may be NULL when calling function
4924 indirectly or considering a libcall. Otherwise return 0. */
4927 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4929 gcc_assert (!TARGET_64BIT);
4931 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4932 by the sseregparm attribute. */
4933 if (TARGET_SSEREGPARM
4934 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4941 error ("Calling %qD with attribute sseregparm without "
4942 "SSE/SSE2 enabled", decl);
4944 error ("Calling %qT with attribute sseregparm without "
4945 "SSE/SSE2 enabled", type);
4953 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4954 (and DFmode for SSE2) arguments in SSE registers. */
4955 if (decl && TARGET_SSE_MATH && optimize
4956 && !(profile_flag && !flag_fentry))
4958 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4959 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4961 return TARGET_SSE2 ? 2 : 1;
4967 /* Return true if EAX is live at the start of the function. Used by
4968 ix86_expand_prologue to determine if we need special help before
4969 calling allocate_stack_worker. */
4972 ix86_eax_live_at_start_p (void)
4974 /* Cheat. Don't bother working forward from ix86_function_regparm
4975 to the function type to whether an actual argument is located in
4976 eax. Instead just look at cfg info, which is still close enough
4977 to correct at this point. This gives false positives for broken
4978 functions that might use uninitialized data that happens to be
4979 allocated in eax, but who cares? */
4980 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4983 /* Value is the number of bytes of arguments automatically
4984 popped when returning from a subroutine call.
4985 FUNDECL is the declaration node of the function (as a tree),
4986 FUNTYPE is the data type of the function (as a tree),
4987 or for a library call it is an identifier node for the subroutine name.
4988 SIZE is the number of bytes of arguments passed on the stack.
4990 On the 80386, the RTD insn may be used to pop them if the number
4991 of args is fixed, but if the number is variable then the caller
4992 must pop them all. RTD can't be used for library calls now
4993 because the library is compiled with the Unix compiler.
4994 Use of RTD is a selectable option, since it is incompatible with
4995 standard Unix calling sequences. If the option is not selected,
4996 the caller must always pop the args.
4998 The attribute stdcall is equivalent to RTD on a per module basis. */
5001 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5005 /* None of the 64-bit ABIs pop arguments. */
5009 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5011 /* Cdecl functions override -mrtd, and never pop the stack. */
5012 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5014 /* Stdcall and fastcall functions will pop the stack if not
5016 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5017 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5018 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5021 if (rtd && ! stdarg_p (funtype))
5025 /* Lose any fake structure return argument if it is passed on the stack. */
5026 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5027 && !KEEP_AGGREGATE_RETURN_POINTER)
5029 int nregs = ix86_function_regparm (funtype, fundecl);
5031 return GET_MODE_SIZE (Pmode);
5037 /* Argument support functions. */
5039 /* Return true when register may be used to pass function parameters. */
5041 ix86_function_arg_regno_p (int regno)
5044 const int *parm_regs;
5049 return (regno < REGPARM_MAX
5050 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5052 return (regno < REGPARM_MAX
5053 || (TARGET_MMX && MMX_REGNO_P (regno)
5054 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5055 || (TARGET_SSE && SSE_REGNO_P (regno)
5056 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5061 if (SSE_REGNO_P (regno) && TARGET_SSE)
5066 if (TARGET_SSE && SSE_REGNO_P (regno)
5067 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5071 /* TODO: The function should depend on current function ABI but
5072 builtins.c would need updating then. Therefore we use the
5075 /* RAX is used as hidden argument to va_arg functions. */
5076 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5079 if (ix86_abi == MS_ABI)
5080 parm_regs = x86_64_ms_abi_int_parameter_registers;
5082 parm_regs = x86_64_int_parameter_registers;
5083 for (i = 0; i < (ix86_abi == MS_ABI
5084 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5085 if (regno == parm_regs[i])
5090 /* Return if we do not know how to pass TYPE solely in registers. */
5093 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5095 if (must_pass_in_stack_var_size_or_pad (mode, type))
5098 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5099 The layout_type routine is crafty and tries to trick us into passing
5100 currently unsupported vector types on the stack by using TImode. */
5101 return (!TARGET_64BIT && mode == TImode
5102 && type && TREE_CODE (type) != VECTOR_TYPE);
5105 /* It returns the size, in bytes, of the area reserved for arguments passed
5106 in registers for the function represented by fndecl dependent to the used
5109 ix86_reg_parm_stack_space (const_tree fndecl)
5111 enum calling_abi call_abi = SYSV_ABI;
5112 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5113 call_abi = ix86_function_abi (fndecl);
5115 call_abi = ix86_function_type_abi (fndecl);
5116 if (call_abi == MS_ABI)
5121 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5124 ix86_function_type_abi (const_tree fntype)
5126 if (TARGET_64BIT && fntype != NULL)
5128 enum calling_abi abi = ix86_abi;
5129 if (abi == SYSV_ABI)
5131 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5134 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5142 ix86_function_ms_hook_prologue (const_tree fn)
5144 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5146 if (decl_function_context (fn) != NULL_TREE)
5147 error_at (DECL_SOURCE_LOCATION (fn),
5148 "ms_hook_prologue is not compatible with nested function");
5155 static enum calling_abi
5156 ix86_function_abi (const_tree fndecl)
5160 return ix86_function_type_abi (TREE_TYPE (fndecl));
5163 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5166 ix86_cfun_abi (void)
5168 if (! cfun || ! TARGET_64BIT)
5170 return cfun->machine->call_abi;
5173 /* Write the extra assembler code needed to declare a function properly. */
5176 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5179 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5183 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5184 unsigned int filler_cc = 0xcccccccc;
5186 for (i = 0; i < filler_count; i += 4)
5187 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5190 ASM_OUTPUT_LABEL (asm_out_file, fname);
5192 /* Output magic byte marker, if hot-patch attribute is set. */
5197 /* leaq [%rsp + 0], %rsp */
5198 asm_fprintf (asm_out_file, ASM_BYTE
5199 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5203 /* movl.s %edi, %edi
5205 movl.s %esp, %ebp */
5206 asm_fprintf (asm_out_file, ASM_BYTE
5207 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5213 extern void init_regs (void);
5215 /* Implementation of call abi switching target hook. Specific to FNDECL
5216 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5217 for more details. */
5219 ix86_call_abi_override (const_tree fndecl)
5221 if (fndecl == NULL_TREE)
5222 cfun->machine->call_abi = ix86_abi;
5224 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5227 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5228 re-initialization of init_regs each time we switch function context since
5229 this is needed only during RTL expansion. */
5231 ix86_maybe_switch_abi (void)
5234 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5238 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5239 for a call to a function whose data type is FNTYPE.
5240 For a library call, FNTYPE is 0. */
5243 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5244 tree fntype, /* tree ptr for function decl */
5245 rtx libname, /* SYMBOL_REF of library name or 0 */
5248 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5249 memset (cum, 0, sizeof (*cum));
5252 cum->call_abi = ix86_function_abi (fndecl);
5254 cum->call_abi = ix86_function_type_abi (fntype);
5255 /* Set up the number of registers to use for passing arguments. */
5257 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5258 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5259 "or subtarget optimization implying it");
5260 cum->nregs = ix86_regparm;
5263 cum->nregs = (cum->call_abi == SYSV_ABI
5264 ? X86_64_REGPARM_MAX
5265 : X86_64_MS_REGPARM_MAX);
5269 cum->sse_nregs = SSE_REGPARM_MAX;
5272 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5273 ? X86_64_SSE_REGPARM_MAX
5274 : X86_64_MS_SSE_REGPARM_MAX);
5278 cum->mmx_nregs = MMX_REGPARM_MAX;
5279 cum->warn_avx = true;
5280 cum->warn_sse = true;
5281 cum->warn_mmx = true;
5283 /* Because type might mismatch in between caller and callee, we need to
5284 use actual type of function for local calls.
5285 FIXME: cgraph_analyze can be told to actually record if function uses
5286 va_start so for local functions maybe_vaarg can be made aggressive
5288 FIXME: once typesytem is fixed, we won't need this code anymore. */
5290 fntype = TREE_TYPE (fndecl);
5291 cum->maybe_vaarg = (fntype
5292 ? (!prototype_p (fntype) || stdarg_p (fntype))
5297 /* If there are variable arguments, then we won't pass anything
5298 in registers in 32-bit mode. */
5299 if (stdarg_p (fntype))
5310 /* Use ecx and edx registers if function has fastcall attribute,
5311 else look for regparm information. */
5314 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5317 cum->fastcall = 1; /* Same first register as in fastcall. */
5319 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5325 cum->nregs = ix86_function_regparm (fntype, fndecl);
5328 /* Set up the number of SSE registers used for passing SFmode
5329 and DFmode arguments. Warn for mismatching ABI. */
5330 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5334 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5335 But in the case of vector types, it is some vector mode.
5337 When we have only some of our vector isa extensions enabled, then there
5338 are some modes for which vector_mode_supported_p is false. For these
5339 modes, the generic vector support in gcc will choose some non-vector mode
5340 in order to implement the type. By computing the natural mode, we'll
5341 select the proper ABI location for the operand and not depend on whatever
5342 the middle-end decides to do with these vector types.
5344 The midde-end can't deal with the vector types > 16 bytes. In this
5345 case, we return the original mode and warn ABI change if CUM isn't
5348 static enum machine_mode
5349 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5351 enum machine_mode mode = TYPE_MODE (type);
5353 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5355 HOST_WIDE_INT size = int_size_in_bytes (type);
5356 if ((size == 8 || size == 16 || size == 32)
5357 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5358 && TYPE_VECTOR_SUBPARTS (type) > 1)
5360 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5362 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5363 mode = MIN_MODE_VECTOR_FLOAT;
5365 mode = MIN_MODE_VECTOR_INT;
5367 /* Get the mode which has this inner mode and number of units. */
5368 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5369 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5370 && GET_MODE_INNER (mode) == innermode)
5372 if (size == 32 && !TARGET_AVX)
5374 static bool warnedavx;
5381 warning (0, "AVX vector argument without AVX "
5382 "enabled changes the ABI");
5384 return TYPE_MODE (type);
5397 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5398 this may not agree with the mode that the type system has chosen for the
5399 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5400 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5403 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5408 if (orig_mode != BLKmode)
5409 tmp = gen_rtx_REG (orig_mode, regno);
5412 tmp = gen_rtx_REG (mode, regno);
5413 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5414 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5420 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5421 of this code is to classify each 8bytes of incoming argument by the register
5422 class and assign registers accordingly. */
5424 /* Return the union class of CLASS1 and CLASS2.
5425 See the x86-64 PS ABI for details. */
5427 static enum x86_64_reg_class
5428 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5430 /* Rule #1: If both classes are equal, this is the resulting class. */
5431 if (class1 == class2)
5434 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5436 if (class1 == X86_64_NO_CLASS)
5438 if (class2 == X86_64_NO_CLASS)
5441 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5442 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5443 return X86_64_MEMORY_CLASS;
5445 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5446 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5447 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5448 return X86_64_INTEGERSI_CLASS;
5449 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5450 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5451 return X86_64_INTEGER_CLASS;
5453 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5455 if (class1 == X86_64_X87_CLASS
5456 || class1 == X86_64_X87UP_CLASS
5457 || class1 == X86_64_COMPLEX_X87_CLASS
5458 || class2 == X86_64_X87_CLASS
5459 || class2 == X86_64_X87UP_CLASS
5460 || class2 == X86_64_COMPLEX_X87_CLASS)
5461 return X86_64_MEMORY_CLASS;
5463 /* Rule #6: Otherwise class SSE is used. */
5464 return X86_64_SSE_CLASS;
5467 /* Classify the argument of type TYPE and mode MODE.
5468 CLASSES will be filled by the register class used to pass each word
5469 of the operand. The number of words is returned. In case the parameter
5470 should be passed in memory, 0 is returned. As a special case for zero
5471 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5473 BIT_OFFSET is used internally for handling records and specifies offset
5474 of the offset in bits modulo 256 to avoid overflow cases.
5476 See the x86-64 PS ABI for details.
5480 classify_argument (enum machine_mode mode, const_tree type,
5481 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5483 HOST_WIDE_INT bytes =
5484 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5485 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5487 /* Variable sized entities are always passed/returned in memory. */
5491 if (mode != VOIDmode
5492 && targetm.calls.must_pass_in_stack (mode, type))
5495 if (type && AGGREGATE_TYPE_P (type))
5499 enum x86_64_reg_class subclasses[MAX_CLASSES];
5501 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5505 for (i = 0; i < words; i++)
5506 classes[i] = X86_64_NO_CLASS;
5508 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5509 signalize memory class, so handle it as special case. */
5512 classes[0] = X86_64_NO_CLASS;
5516 /* Classify each field of record and merge classes. */
5517 switch (TREE_CODE (type))
5520 /* And now merge the fields of structure. */
5521 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5523 if (TREE_CODE (field) == FIELD_DECL)
5527 if (TREE_TYPE (field) == error_mark_node)
5530 /* Bitfields are always classified as integer. Handle them
5531 early, since later code would consider them to be
5532 misaligned integers. */
5533 if (DECL_BIT_FIELD (field))
5535 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5536 i < ((int_bit_position (field) + (bit_offset % 64))
5537 + tree_low_cst (DECL_SIZE (field), 0)
5540 merge_classes (X86_64_INTEGER_CLASS,
5547 type = TREE_TYPE (field);
5549 /* Flexible array member is ignored. */
5550 if (TYPE_MODE (type) == BLKmode
5551 && TREE_CODE (type) == ARRAY_TYPE
5552 && TYPE_SIZE (type) == NULL_TREE
5553 && TYPE_DOMAIN (type) != NULL_TREE
5554 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5559 if (!warned && warn_psabi)
5562 inform (input_location,
5563 "The ABI of passing struct with"
5564 " a flexible array member has"
5565 " changed in GCC 4.4");
5569 num = classify_argument (TYPE_MODE (type), type,
5571 (int_bit_position (field)
5572 + bit_offset) % 256);
5575 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5576 for (i = 0; i < num && (i + pos) < words; i++)
5578 merge_classes (subclasses[i], classes[i + pos]);
5585 /* Arrays are handled as small records. */
5588 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5589 TREE_TYPE (type), subclasses, bit_offset);
5593 /* The partial classes are now full classes. */
5594 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5595 subclasses[0] = X86_64_SSE_CLASS;
5596 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5597 && !((bit_offset % 64) == 0 && bytes == 4))
5598 subclasses[0] = X86_64_INTEGER_CLASS;
5600 for (i = 0; i < words; i++)
5601 classes[i] = subclasses[i % num];
5606 case QUAL_UNION_TYPE:
5607 /* Unions are similar to RECORD_TYPE but offset is always 0.
5609 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5611 if (TREE_CODE (field) == FIELD_DECL)
5615 if (TREE_TYPE (field) == error_mark_node)
5618 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5619 TREE_TYPE (field), subclasses,
5623 for (i = 0; i < num; i++)
5624 classes[i] = merge_classes (subclasses[i], classes[i]);
5635 /* When size > 16 bytes, if the first one isn't
5636 X86_64_SSE_CLASS or any other ones aren't
5637 X86_64_SSEUP_CLASS, everything should be passed in
5639 if (classes[0] != X86_64_SSE_CLASS)
5642 for (i = 1; i < words; i++)
5643 if (classes[i] != X86_64_SSEUP_CLASS)
5647 /* Final merger cleanup. */
5648 for (i = 0; i < words; i++)
5650 /* If one class is MEMORY, everything should be passed in
5652 if (classes[i] == X86_64_MEMORY_CLASS)
5655 /* The X86_64_SSEUP_CLASS should be always preceded by
5656 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5657 if (classes[i] == X86_64_SSEUP_CLASS
5658 && classes[i - 1] != X86_64_SSE_CLASS
5659 && classes[i - 1] != X86_64_SSEUP_CLASS)
5661 /* The first one should never be X86_64_SSEUP_CLASS. */
5662 gcc_assert (i != 0);
5663 classes[i] = X86_64_SSE_CLASS;
5666 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5667 everything should be passed in memory. */
5668 if (classes[i] == X86_64_X87UP_CLASS
5669 && (classes[i - 1] != X86_64_X87_CLASS))
5673 /* The first one should never be X86_64_X87UP_CLASS. */
5674 gcc_assert (i != 0);
5675 if (!warned && warn_psabi)
5678 inform (input_location,
5679 "The ABI of passing union with long double"
5680 " has changed in GCC 4.4");
5688 /* Compute alignment needed. We align all types to natural boundaries with
5689 exception of XFmode that is aligned to 64bits. */
5690 if (mode != VOIDmode && mode != BLKmode)
5692 int mode_alignment = GET_MODE_BITSIZE (mode);
5695 mode_alignment = 128;
5696 else if (mode == XCmode)
5697 mode_alignment = 256;
5698 if (COMPLEX_MODE_P (mode))
5699 mode_alignment /= 2;
5700 /* Misaligned fields are always returned in memory. */
5701 if (bit_offset % mode_alignment)
5705 /* for V1xx modes, just use the base mode */
5706 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5707 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5708 mode = GET_MODE_INNER (mode);
5710 /* Classification of atomic types. */
5715 classes[0] = X86_64_SSE_CLASS;
5718 classes[0] = X86_64_SSE_CLASS;
5719 classes[1] = X86_64_SSEUP_CLASS;
5729 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5733 classes[0] = X86_64_INTEGERSI_CLASS;
5736 else if (size <= 64)
5738 classes[0] = X86_64_INTEGER_CLASS;
5741 else if (size <= 64+32)
5743 classes[0] = X86_64_INTEGER_CLASS;
5744 classes[1] = X86_64_INTEGERSI_CLASS;
5747 else if (size <= 64+64)
5749 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5757 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5761 /* OImode shouldn't be used directly. */
5766 if (!(bit_offset % 64))
5767 classes[0] = X86_64_SSESF_CLASS;
5769 classes[0] = X86_64_SSE_CLASS;
5772 classes[0] = X86_64_SSEDF_CLASS;
5775 classes[0] = X86_64_X87_CLASS;
5776 classes[1] = X86_64_X87UP_CLASS;
5779 classes[0] = X86_64_SSE_CLASS;
5780 classes[1] = X86_64_SSEUP_CLASS;
5783 classes[0] = X86_64_SSE_CLASS;
5784 if (!(bit_offset % 64))
5790 if (!warned && warn_psabi)
5793 inform (input_location,
5794 "The ABI of passing structure with complex float"
5795 " member has changed in GCC 4.4");
5797 classes[1] = X86_64_SSESF_CLASS;
5801 classes[0] = X86_64_SSEDF_CLASS;
5802 classes[1] = X86_64_SSEDF_CLASS;
5805 classes[0] = X86_64_COMPLEX_X87_CLASS;
5808 /* This modes is larger than 16 bytes. */
5816 classes[0] = X86_64_SSE_CLASS;
5817 classes[1] = X86_64_SSEUP_CLASS;
5818 classes[2] = X86_64_SSEUP_CLASS;
5819 classes[3] = X86_64_SSEUP_CLASS;
5827 classes[0] = X86_64_SSE_CLASS;
5828 classes[1] = X86_64_SSEUP_CLASS;
5836 classes[0] = X86_64_SSE_CLASS;
5842 gcc_assert (VECTOR_MODE_P (mode));
5847 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5849 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5850 classes[0] = X86_64_INTEGERSI_CLASS;
5852 classes[0] = X86_64_INTEGER_CLASS;
5853 classes[1] = X86_64_INTEGER_CLASS;
5854 return 1 + (bytes > 8);
5858 /* Examine the argument and return set number of register required in each
5859 class. Return 0 iff parameter should be passed in memory. */
5861 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5862 int *int_nregs, int *sse_nregs)
5864 enum x86_64_reg_class regclass[MAX_CLASSES];
5865 int n = classify_argument (mode, type, regclass, 0);
5871 for (n--; n >= 0; n--)
5872 switch (regclass[n])
5874 case X86_64_INTEGER_CLASS:
5875 case X86_64_INTEGERSI_CLASS:
5878 case X86_64_SSE_CLASS:
5879 case X86_64_SSESF_CLASS:
5880 case X86_64_SSEDF_CLASS:
5883 case X86_64_NO_CLASS:
5884 case X86_64_SSEUP_CLASS:
5886 case X86_64_X87_CLASS:
5887 case X86_64_X87UP_CLASS:
5891 case X86_64_COMPLEX_X87_CLASS:
5892 return in_return ? 2 : 0;
5893 case X86_64_MEMORY_CLASS:
5899 /* Construct container for the argument used by GCC interface. See
5900 FUNCTION_ARG for the detailed description. */
5903 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5904 const_tree type, int in_return, int nintregs, int nsseregs,
5905 const int *intreg, int sse_regno)
5907 /* The following variables hold the static issued_error state. */
5908 static bool issued_sse_arg_error;
5909 static bool issued_sse_ret_error;
5910 static bool issued_x87_ret_error;
5912 enum machine_mode tmpmode;
5914 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5915 enum x86_64_reg_class regclass[MAX_CLASSES];
5919 int needed_sseregs, needed_intregs;
5920 rtx exp[MAX_CLASSES];
5923 n = classify_argument (mode, type, regclass, 0);
5926 if (!examine_argument (mode, type, in_return, &needed_intregs,
5929 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5932 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5933 some less clueful developer tries to use floating-point anyway. */
5934 if (needed_sseregs && !TARGET_SSE)
5938 if (!issued_sse_ret_error)
5940 error ("SSE register return with SSE disabled");
5941 issued_sse_ret_error = true;
5944 else if (!issued_sse_arg_error)
5946 error ("SSE register argument with SSE disabled");
5947 issued_sse_arg_error = true;
5952 /* Likewise, error if the ABI requires us to return values in the
5953 x87 registers and the user specified -mno-80387. */
5954 if (!TARGET_80387 && in_return)
5955 for (i = 0; i < n; i++)
5956 if (regclass[i] == X86_64_X87_CLASS
5957 || regclass[i] == X86_64_X87UP_CLASS
5958 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5960 if (!issued_x87_ret_error)
5962 error ("x87 register return with x87 disabled");
5963 issued_x87_ret_error = true;
5968 /* First construct simple cases. Avoid SCmode, since we want to use
5969 single register to pass this type. */
5970 if (n == 1 && mode != SCmode)
5971 switch (regclass[0])
5973 case X86_64_INTEGER_CLASS:
5974 case X86_64_INTEGERSI_CLASS:
5975 return gen_rtx_REG (mode, intreg[0]);
5976 case X86_64_SSE_CLASS:
5977 case X86_64_SSESF_CLASS:
5978 case X86_64_SSEDF_CLASS:
5979 if (mode != BLKmode)
5980 return gen_reg_or_parallel (mode, orig_mode,
5981 SSE_REGNO (sse_regno));
5983 case X86_64_X87_CLASS:
5984 case X86_64_COMPLEX_X87_CLASS:
5985 return gen_rtx_REG (mode, FIRST_STACK_REG);
5986 case X86_64_NO_CLASS:
5987 /* Zero sized array, struct or class. */
5992 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5993 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5994 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5996 && regclass[0] == X86_64_SSE_CLASS
5997 && regclass[1] == X86_64_SSEUP_CLASS
5998 && regclass[2] == X86_64_SSEUP_CLASS
5999 && regclass[3] == X86_64_SSEUP_CLASS
6001 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6004 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6005 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6006 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6007 && regclass[1] == X86_64_INTEGER_CLASS
6008 && (mode == CDImode || mode == TImode || mode == TFmode)
6009 && intreg[0] + 1 == intreg[1])
6010 return gen_rtx_REG (mode, intreg[0]);
6012 /* Otherwise figure out the entries of the PARALLEL. */
6013 for (i = 0; i < n; i++)
6017 switch (regclass[i])
6019 case X86_64_NO_CLASS:
6021 case X86_64_INTEGER_CLASS:
6022 case X86_64_INTEGERSI_CLASS:
6023 /* Merge TImodes on aligned occasions here too. */
6024 if (i * 8 + 8 > bytes)
6025 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6026 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6030 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6031 if (tmpmode == BLKmode)
6033 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6034 gen_rtx_REG (tmpmode, *intreg),
6038 case X86_64_SSESF_CLASS:
6039 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6040 gen_rtx_REG (SFmode,
6041 SSE_REGNO (sse_regno)),
6045 case X86_64_SSEDF_CLASS:
6046 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6047 gen_rtx_REG (DFmode,
6048 SSE_REGNO (sse_regno)),
6052 case X86_64_SSE_CLASS:
6060 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6070 && regclass[1] == X86_64_SSEUP_CLASS
6071 && regclass[2] == X86_64_SSEUP_CLASS
6072 && regclass[3] == X86_64_SSEUP_CLASS);
6079 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6080 gen_rtx_REG (tmpmode,
6081 SSE_REGNO (sse_regno)),
6090 /* Empty aligned struct, union or class. */
6094 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6095 for (i = 0; i < nexps; i++)
6096 XVECEXP (ret, 0, i) = exp [i];
6100 /* Update the data in CUM to advance over an argument of mode MODE
6101 and data type TYPE. (TYPE is null for libcalls where that information
6102 may not be available.) */
6105 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6106 const_tree type, HOST_WIDE_INT bytes,
6107 HOST_WIDE_INT words)
6123 cum->words += words;
6124 cum->nregs -= words;
6125 cum->regno += words;
6127 if (cum->nregs <= 0)
6135 /* OImode shouldn't be used directly. */
6139 if (cum->float_in_sse < 2)
6142 if (cum->float_in_sse < 1)
6159 if (!type || !AGGREGATE_TYPE_P (type))
6161 cum->sse_words += words;
6162 cum->sse_nregs -= 1;
6163 cum->sse_regno += 1;
6164 if (cum->sse_nregs <= 0)
6178 if (!type || !AGGREGATE_TYPE_P (type))
6180 cum->mmx_words += words;
6181 cum->mmx_nregs -= 1;
6182 cum->mmx_regno += 1;
6183 if (cum->mmx_nregs <= 0)
6194 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6195 const_tree type, HOST_WIDE_INT words, bool named)
6197 int int_nregs, sse_nregs;
6199 /* Unnamed 256bit vector mode parameters are passed on stack. */
6200 if (!named && VALID_AVX256_REG_MODE (mode))
6203 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6204 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6206 cum->nregs -= int_nregs;
6207 cum->sse_nregs -= sse_nregs;
6208 cum->regno += int_nregs;
6209 cum->sse_regno += sse_nregs;
6213 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6214 cum->words = (cum->words + align - 1) & ~(align - 1);
6215 cum->words += words;
6220 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6221 HOST_WIDE_INT words)
6223 /* Otherwise, this should be passed indirect. */
6224 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6226 cum->words += words;
6234 /* Update the data in CUM to advance over an argument of mode MODE and
6235 data type TYPE. (TYPE is null for libcalls where that information
6236 may not be available.) */
6239 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6240 const_tree type, bool named)
6242 HOST_WIDE_INT bytes, words;
6244 if (mode == BLKmode)
6245 bytes = int_size_in_bytes (type);
6247 bytes = GET_MODE_SIZE (mode);
6248 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6251 mode = type_natural_mode (type, NULL);
6253 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6254 function_arg_advance_ms_64 (cum, bytes, words);
6255 else if (TARGET_64BIT)
6256 function_arg_advance_64 (cum, mode, type, words, named);
6258 function_arg_advance_32 (cum, mode, type, bytes, words);
6261 /* Define where to put the arguments to a function.
6262 Value is zero to push the argument on the stack,
6263 or a hard register in which to store the argument.
6265 MODE is the argument's machine mode.
6266 TYPE is the data type of the argument (as a tree).
6267 This is null for libcalls where that information may
6269 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6270 the preceding args and about the function being called.
6271 NAMED is nonzero if this argument is a named parameter
6272 (otherwise it is an extra parameter matching an ellipsis). */
6275 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6276 enum machine_mode orig_mode, const_tree type,
6277 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6279 static bool warnedsse, warnedmmx;
6281 /* Avoid the AL settings for the Unix64 ABI. */
6282 if (mode == VOIDmode)
6298 if (words <= cum->nregs)
6300 int regno = cum->regno;
6302 /* Fastcall allocates the first two DWORD (SImode) or
6303 smaller arguments to ECX and EDX if it isn't an
6309 || (type && AGGREGATE_TYPE_P (type)))
6312 /* ECX not EAX is the first allocated register. */
6313 if (regno == AX_REG)
6316 return gen_rtx_REG (mode, regno);
6321 if (cum->float_in_sse < 2)
6324 if (cum->float_in_sse < 1)
6328 /* In 32bit, we pass TImode in xmm registers. */
6335 if (!type || !AGGREGATE_TYPE_P (type))
6337 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6340 warning (0, "SSE vector argument without SSE enabled "
6344 return gen_reg_or_parallel (mode, orig_mode,
6345 cum->sse_regno + FIRST_SSE_REG);
6350 /* OImode shouldn't be used directly. */
6359 if (!type || !AGGREGATE_TYPE_P (type))
6362 return gen_reg_or_parallel (mode, orig_mode,
6363 cum->sse_regno + FIRST_SSE_REG);
6373 if (!type || !AGGREGATE_TYPE_P (type))
6375 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6378 warning (0, "MMX vector argument without MMX enabled "
6382 return gen_reg_or_parallel (mode, orig_mode,
6383 cum->mmx_regno + FIRST_MMX_REG);
6392 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6393 enum machine_mode orig_mode, const_tree type, bool named)
6395 /* Handle a hidden AL argument containing number of registers
6396 for varargs x86-64 functions. */
6397 if (mode == VOIDmode)
6398 return GEN_INT (cum->maybe_vaarg
6399 ? (cum->sse_nregs < 0
6400 ? X86_64_SSE_REGPARM_MAX
6415 /* Unnamed 256bit vector mode parameters are passed on stack. */
6421 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6423 &x86_64_int_parameter_registers [cum->regno],
6428 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6429 enum machine_mode orig_mode, bool named,
6430 HOST_WIDE_INT bytes)
6434 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6435 We use value of -2 to specify that current function call is MSABI. */
6436 if (mode == VOIDmode)
6437 return GEN_INT (-2);
6439 /* If we've run out of registers, it goes on the stack. */
6440 if (cum->nregs == 0)
6443 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6445 /* Only floating point modes are passed in anything but integer regs. */
6446 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6449 regno = cum->regno + FIRST_SSE_REG;
6454 /* Unnamed floating parameters are passed in both the
6455 SSE and integer registers. */
6456 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6457 t2 = gen_rtx_REG (mode, regno);
6458 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6459 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6460 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6463 /* Handle aggregated types passed in register. */
6464 if (orig_mode == BLKmode)
6466 if (bytes > 0 && bytes <= 8)
6467 mode = (bytes > 4 ? DImode : SImode);
6468 if (mode == BLKmode)
6472 return gen_reg_or_parallel (mode, orig_mode, regno);
6475 /* Return where to put the arguments to a function.
6476 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6478 MODE is the argument's machine mode. TYPE is the data type of the
6479 argument. It is null for libcalls where that information may not be
6480 available. CUM gives information about the preceding args and about
6481 the function being called. NAMED is nonzero if this argument is a
6482 named parameter (otherwise it is an extra parameter matching an
6486 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6487 const_tree type, bool named)
6489 enum machine_mode mode = omode;
6490 HOST_WIDE_INT bytes, words;
6492 if (mode == BLKmode)
6493 bytes = int_size_in_bytes (type);
6495 bytes = GET_MODE_SIZE (mode);
6496 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6498 /* To simplify the code below, represent vector types with a vector mode
6499 even if MMX/SSE are not active. */
6500 if (type && TREE_CODE (type) == VECTOR_TYPE)
6501 mode = type_natural_mode (type, cum);
6503 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6504 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6505 else if (TARGET_64BIT)
6506 return function_arg_64 (cum, mode, omode, type, named);
6508 return function_arg_32 (cum, mode, omode, type, bytes, words);
6511 /* A C expression that indicates when an argument must be passed by
6512 reference. If nonzero for an argument, a copy of that argument is
6513 made in memory and a pointer to the argument is passed instead of
6514 the argument itself. The pointer is passed in whatever way is
6515 appropriate for passing a pointer to that type. */
6518 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6519 enum machine_mode mode ATTRIBUTE_UNUSED,
6520 const_tree type, bool named ATTRIBUTE_UNUSED)
6522 /* See Windows x64 Software Convention. */
6523 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6525 int msize = (int) GET_MODE_SIZE (mode);
6528 /* Arrays are passed by reference. */
6529 if (TREE_CODE (type) == ARRAY_TYPE)
6532 if (AGGREGATE_TYPE_P (type))
6534 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6535 are passed by reference. */
6536 msize = int_size_in_bytes (type);
6540 /* __m128 is passed by reference. */
6542 case 1: case 2: case 4: case 8:
6548 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6554 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6557 contains_aligned_value_p (const_tree type)
6559 enum machine_mode mode = TYPE_MODE (type);
6560 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6564 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6566 if (TYPE_ALIGN (type) < 128)
6569 if (AGGREGATE_TYPE_P (type))
6571 /* Walk the aggregates recursively. */
6572 switch (TREE_CODE (type))
6576 case QUAL_UNION_TYPE:
6580 /* Walk all the structure fields. */
6581 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6583 if (TREE_CODE (field) == FIELD_DECL
6584 && contains_aligned_value_p (TREE_TYPE (field)))
6591 /* Just for use if some languages passes arrays by value. */
6592 if (contains_aligned_value_p (TREE_TYPE (type)))
6603 /* Gives the alignment boundary, in bits, of an argument with the
6604 specified mode and type. */
6607 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6612 /* Since the main variant type is used for call, we convert it to
6613 the main variant type. */
6614 type = TYPE_MAIN_VARIANT (type);
6615 align = TYPE_ALIGN (type);
6618 align = GET_MODE_ALIGNMENT (mode);
6619 if (align < PARM_BOUNDARY)
6620 align = PARM_BOUNDARY;
6621 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6622 natural boundaries. */
6623 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6625 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6626 make an exception for SSE modes since these require 128bit
6629 The handling here differs from field_alignment. ICC aligns MMX
6630 arguments to 4 byte boundaries, while structure fields are aligned
6631 to 8 byte boundaries. */
6634 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6635 align = PARM_BOUNDARY;
6639 if (!contains_aligned_value_p (type))
6640 align = PARM_BOUNDARY;
6643 if (align > BIGGEST_ALIGNMENT)
6644 align = BIGGEST_ALIGNMENT;
6648 /* Return true if N is a possible register number of function value. */
6651 ix86_function_value_regno_p (const unsigned int regno)
6658 case FIRST_FLOAT_REG:
6659 /* TODO: The function should depend on current function ABI but
6660 builtins.c would need updating then. Therefore we use the
6662 if (TARGET_64BIT && ix86_abi == MS_ABI)
6664 return TARGET_FLOAT_RETURNS_IN_80387;
6670 if (TARGET_MACHO || TARGET_64BIT)
6678 /* Define how to find the value returned by a function.
6679 VALTYPE is the data type of the value (as a tree).
6680 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6681 otherwise, FUNC is 0. */
6684 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6685 const_tree fntype, const_tree fn)
6689 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6690 we normally prevent this case when mmx is not available. However
6691 some ABIs may require the result to be returned like DImode. */
6692 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6693 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6695 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6696 we prevent this case when sse is not available. However some ABIs
6697 may require the result to be returned like integer TImode. */
6698 else if (mode == TImode
6699 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6700 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6702 /* 32-byte vector modes in %ymm0. */
6703 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6704 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6706 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6707 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6708 regno = FIRST_FLOAT_REG;
6710 /* Most things go in %eax. */
6713 /* Override FP return register with %xmm0 for local functions when
6714 SSE math is enabled or for functions with sseregparm attribute. */
6715 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6717 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6718 if ((sse_level >= 1 && mode == SFmode)
6719 || (sse_level == 2 && mode == DFmode))
6720 regno = FIRST_SSE_REG;
6723 /* OImode shouldn't be used directly. */
6724 gcc_assert (mode != OImode);
6726 return gen_rtx_REG (orig_mode, regno);
6730 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6735 /* Handle libcalls, which don't provide a type node. */
6736 if (valtype == NULL)
6748 return gen_rtx_REG (mode, FIRST_SSE_REG);
6751 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6755 return gen_rtx_REG (mode, AX_REG);
6759 ret = construct_container (mode, orig_mode, valtype, 1,
6760 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6761 x86_64_int_return_registers, 0);
6763 /* For zero sized structures, construct_container returns NULL, but we
6764 need to keep rest of compiler happy by returning meaningful value. */
6766 ret = gen_rtx_REG (orig_mode, AX_REG);
6772 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6774 unsigned int regno = AX_REG;
6778 switch (GET_MODE_SIZE (mode))
6781 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6782 && !COMPLEX_MODE_P (mode))
6783 regno = FIRST_SSE_REG;
6787 if (mode == SFmode || mode == DFmode)
6788 regno = FIRST_SSE_REG;
6794 return gen_rtx_REG (orig_mode, regno);
6798 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6799 enum machine_mode orig_mode, enum machine_mode mode)
6801 const_tree fn, fntype;
6804 if (fntype_or_decl && DECL_P (fntype_or_decl))
6805 fn = fntype_or_decl;
6806 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6808 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6809 return function_value_ms_64 (orig_mode, mode);
6810 else if (TARGET_64BIT)
6811 return function_value_64 (orig_mode, mode, valtype);
6813 return function_value_32 (orig_mode, mode, fntype, fn);
6817 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6818 bool outgoing ATTRIBUTE_UNUSED)
6820 enum machine_mode mode, orig_mode;
6822 orig_mode = TYPE_MODE (valtype);
6823 mode = type_natural_mode (valtype, NULL);
6824 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6828 ix86_libcall_value (enum machine_mode mode)
6830 return ix86_function_value_1 (NULL, NULL, mode, mode);
6833 /* Return true iff type is returned in memory. */
6835 static bool ATTRIBUTE_UNUSED
6836 return_in_memory_32 (const_tree type, enum machine_mode mode)
6840 if (mode == BLKmode)
6843 size = int_size_in_bytes (type);
6845 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6848 if (VECTOR_MODE_P (mode) || mode == TImode)
6850 /* User-created vectors small enough to fit in EAX. */
6854 /* MMX/3dNow values are returned in MM0,
6855 except when it doesn't exits or the ABI prescribes otherwise. */
6857 return !TARGET_MMX || TARGET_VECT8_RETURNS;
6859 /* SSE values are returned in XMM0, except when it doesn't exist. */
6863 /* AVX values are returned in YMM0, except when it doesn't exist. */
6874 /* OImode shouldn't be used directly. */
6875 gcc_assert (mode != OImode);
6880 static bool ATTRIBUTE_UNUSED
6881 return_in_memory_64 (const_tree type, enum machine_mode mode)
6883 int needed_intregs, needed_sseregs;
6884 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6887 static bool ATTRIBUTE_UNUSED
6888 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6890 HOST_WIDE_INT size = int_size_in_bytes (type);
6892 /* __m128 is returned in xmm0. */
6893 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6894 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6897 /* Otherwise, the size must be exactly in [1248]. */
6898 return size != 1 && size != 2 && size != 4 && size != 8;
6902 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6904 #ifdef SUBTARGET_RETURN_IN_MEMORY
6905 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6907 const enum machine_mode mode = type_natural_mode (type, NULL);
6911 if (ix86_function_type_abi (fntype) == MS_ABI)
6912 return return_in_memory_ms_64 (type, mode);
6914 return return_in_memory_64 (type, mode);
6917 return return_in_memory_32 (type, mode);
6921 /* When returning SSE vector types, we have a choice of either
6922 (1) being abi incompatible with a -march switch, or
6923 (2) generating an error.
6924 Given no good solution, I think the safest thing is one warning.
6925 The user won't be able to use -Werror, but....
6927 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6928 called in response to actually generating a caller or callee that
6929 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6930 via aggregate_value_p for general type probing from tree-ssa. */
6933 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6935 static bool warnedsse, warnedmmx;
6937 if (!TARGET_64BIT && type)
6939 /* Look at the return type of the function, not the function type. */
6940 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6942 if (!TARGET_SSE && !warnedsse)
6945 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6948 warning (0, "SSE vector return without SSE enabled "
6953 if (!TARGET_MMX && !warnedmmx)
6955 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6958 warning (0, "MMX vector return without MMX enabled "
6968 /* Create the va_list data type. */
6970 /* Returns the calling convention specific va_list date type.
6971 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6974 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6976 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6978 /* For i386 we use plain pointer to argument area. */
6979 if (!TARGET_64BIT || abi == MS_ABI)
6980 return build_pointer_type (char_type_node);
6982 record = lang_hooks.types.make_type (RECORD_TYPE);
6983 type_decl = build_decl (BUILTINS_LOCATION,
6984 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6986 f_gpr = build_decl (BUILTINS_LOCATION,
6987 FIELD_DECL, get_identifier ("gp_offset"),
6988 unsigned_type_node);
6989 f_fpr = build_decl (BUILTINS_LOCATION,
6990 FIELD_DECL, get_identifier ("fp_offset"),
6991 unsigned_type_node);
6992 f_ovf = build_decl (BUILTINS_LOCATION,
6993 FIELD_DECL, get_identifier ("overflow_arg_area"),
6995 f_sav = build_decl (BUILTINS_LOCATION,
6996 FIELD_DECL, get_identifier ("reg_save_area"),
6999 va_list_gpr_counter_field = f_gpr;
7000 va_list_fpr_counter_field = f_fpr;
7002 DECL_FIELD_CONTEXT (f_gpr) = record;
7003 DECL_FIELD_CONTEXT (f_fpr) = record;
7004 DECL_FIELD_CONTEXT (f_ovf) = record;
7005 DECL_FIELD_CONTEXT (f_sav) = record;
7007 TREE_CHAIN (record) = type_decl;
7008 TYPE_NAME (record) = type_decl;
7009 TYPE_FIELDS (record) = f_gpr;
7010 DECL_CHAIN (f_gpr) = f_fpr;
7011 DECL_CHAIN (f_fpr) = f_ovf;
7012 DECL_CHAIN (f_ovf) = f_sav;
7014 layout_type (record);
7016 /* The correct type is an array type of one element. */
7017 return build_array_type (record, build_index_type (size_zero_node));
7020 /* Setup the builtin va_list data type and for 64-bit the additional
7021 calling convention specific va_list data types. */
7024 ix86_build_builtin_va_list (void)
7026 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7028 /* Initialize abi specific va_list builtin types. */
7032 if (ix86_abi == MS_ABI)
7034 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7035 if (TREE_CODE (t) != RECORD_TYPE)
7036 t = build_variant_type_copy (t);
7037 sysv_va_list_type_node = t;
7042 if (TREE_CODE (t) != RECORD_TYPE)
7043 t = build_variant_type_copy (t);
7044 sysv_va_list_type_node = t;
7046 if (ix86_abi != MS_ABI)
7048 t = ix86_build_builtin_va_list_abi (MS_ABI);
7049 if (TREE_CODE (t) != RECORD_TYPE)
7050 t = build_variant_type_copy (t);
7051 ms_va_list_type_node = t;
7056 if (TREE_CODE (t) != RECORD_TYPE)
7057 t = build_variant_type_copy (t);
7058 ms_va_list_type_node = t;
7065 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7068 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7074 /* GPR size of varargs save area. */
7075 if (cfun->va_list_gpr_size)
7076 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7078 ix86_varargs_gpr_size = 0;
7080 /* FPR size of varargs save area. We don't need it if we don't pass
7081 anything in SSE registers. */
7082 if (TARGET_SSE && cfun->va_list_fpr_size)
7083 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7085 ix86_varargs_fpr_size = 0;
7087 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7090 save_area = frame_pointer_rtx;
7091 set = get_varargs_alias_set ();
7093 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7094 if (max > X86_64_REGPARM_MAX)
7095 max = X86_64_REGPARM_MAX;
7097 for (i = cum->regno; i < max; i++)
7099 mem = gen_rtx_MEM (Pmode,
7100 plus_constant (save_area, i * UNITS_PER_WORD));
7101 MEM_NOTRAP_P (mem) = 1;
7102 set_mem_alias_set (mem, set);
7103 emit_move_insn (mem, gen_rtx_REG (Pmode,
7104 x86_64_int_parameter_registers[i]));
7107 if (ix86_varargs_fpr_size)
7109 enum machine_mode smode;
7112 /* Now emit code to save SSE registers. The AX parameter contains number
7113 of SSE parameter registers used to call this function, though all we
7114 actually check here is the zero/non-zero status. */
7116 label = gen_label_rtx ();
7117 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7118 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7121 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7122 we used movdqa (i.e. TImode) instead? Perhaps even better would
7123 be if we could determine the real mode of the data, via a hook
7124 into pass_stdarg. Ignore all that for now. */
7126 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7127 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7129 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7130 if (max > X86_64_SSE_REGPARM_MAX)
7131 max = X86_64_SSE_REGPARM_MAX;
7133 for (i = cum->sse_regno; i < max; ++i)
7135 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7136 mem = gen_rtx_MEM (smode, mem);
7137 MEM_NOTRAP_P (mem) = 1;
7138 set_mem_alias_set (mem, set);
7139 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7141 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7149 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7151 alias_set_type set = get_varargs_alias_set ();
7154 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7158 mem = gen_rtx_MEM (Pmode,
7159 plus_constant (virtual_incoming_args_rtx,
7160 i * UNITS_PER_WORD));
7161 MEM_NOTRAP_P (mem) = 1;
7162 set_mem_alias_set (mem, set);
7164 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7165 emit_move_insn (mem, reg);
7170 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7171 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7174 CUMULATIVE_ARGS next_cum;
7177 /* This argument doesn't appear to be used anymore. Which is good,
7178 because the old code here didn't suppress rtl generation. */
7179 gcc_assert (!no_rtl);
7184 fntype = TREE_TYPE (current_function_decl);
7186 /* For varargs, we do not want to skip the dummy va_dcl argument.
7187 For stdargs, we do want to skip the last named argument. */
7189 if (stdarg_p (fntype))
7190 ix86_function_arg_advance (&next_cum, mode, type, true);
7192 if (cum->call_abi == MS_ABI)
7193 setup_incoming_varargs_ms_64 (&next_cum);
7195 setup_incoming_varargs_64 (&next_cum);
7198 /* Checks if TYPE is of kind va_list char *. */
7201 is_va_list_char_pointer (tree type)
7205 /* For 32-bit it is always true. */
7208 canonic = ix86_canonical_va_list_type (type);
7209 return (canonic == ms_va_list_type_node
7210 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7213 /* Implement va_start. */
7216 ix86_va_start (tree valist, rtx nextarg)
7218 HOST_WIDE_INT words, n_gpr, n_fpr;
7219 tree f_gpr, f_fpr, f_ovf, f_sav;
7220 tree gpr, fpr, ovf, sav, t;
7225 if (flag_split_stack
7226 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7228 unsigned int scratch_regno;
7230 /* When we are splitting the stack, we can't refer to the stack
7231 arguments using internal_arg_pointer, because they may be on
7232 the old stack. The split stack prologue will arrange to
7233 leave a pointer to the old stack arguments in a scratch
7234 register, which we here copy to a pseudo-register. The split
7235 stack prologue can't set the pseudo-register directly because
7236 it (the prologue) runs before any registers have been saved. */
7238 scratch_regno = split_stack_prologue_scratch_regno ();
7239 if (scratch_regno != INVALID_REGNUM)
7243 reg = gen_reg_rtx (Pmode);
7244 cfun->machine->split_stack_varargs_pointer = reg;
7247 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7251 push_topmost_sequence ();
7252 emit_insn_after (seq, entry_of_function ());
7253 pop_topmost_sequence ();
7257 /* Only 64bit target needs something special. */
7258 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7260 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7261 std_expand_builtin_va_start (valist, nextarg);
7266 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7267 next = expand_binop (ptr_mode, add_optab,
7268 cfun->machine->split_stack_varargs_pointer,
7269 crtl->args.arg_offset_rtx,
7270 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7271 convert_move (va_r, next, 0);
7276 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7277 f_fpr = DECL_CHAIN (f_gpr);
7278 f_ovf = DECL_CHAIN (f_fpr);
7279 f_sav = DECL_CHAIN (f_ovf);
7281 valist = build_simple_mem_ref (valist);
7282 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7283 /* The following should be folded into the MEM_REF offset. */
7284 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7286 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7288 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7290 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7293 /* Count number of gp and fp argument registers used. */
7294 words = crtl->args.info.words;
7295 n_gpr = crtl->args.info.regno;
7296 n_fpr = crtl->args.info.sse_regno;
7298 if (cfun->va_list_gpr_size)
7300 type = TREE_TYPE (gpr);
7301 t = build2 (MODIFY_EXPR, type,
7302 gpr, build_int_cst (type, n_gpr * 8));
7303 TREE_SIDE_EFFECTS (t) = 1;
7304 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7307 if (TARGET_SSE && cfun->va_list_fpr_size)
7309 type = TREE_TYPE (fpr);
7310 t = build2 (MODIFY_EXPR, type, fpr,
7311 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7312 TREE_SIDE_EFFECTS (t) = 1;
7313 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7316 /* Find the overflow area. */
7317 type = TREE_TYPE (ovf);
7318 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7319 ovf_rtx = crtl->args.internal_arg_pointer;
7321 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7322 t = make_tree (type, ovf_rtx);
7324 t = build2 (POINTER_PLUS_EXPR, type, t,
7325 size_int (words * UNITS_PER_WORD));
7326 t = build2 (MODIFY_EXPR, type, ovf, t);
7327 TREE_SIDE_EFFECTS (t) = 1;
7328 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7330 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7332 /* Find the register save area.
7333 Prologue of the function save it right above stack frame. */
7334 type = TREE_TYPE (sav);
7335 t = make_tree (type, frame_pointer_rtx);
7336 if (!ix86_varargs_gpr_size)
7337 t = build2 (POINTER_PLUS_EXPR, type, t,
7338 size_int (-8 * X86_64_REGPARM_MAX));
7339 t = build2 (MODIFY_EXPR, type, sav, t);
7340 TREE_SIDE_EFFECTS (t) = 1;
7341 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7345 /* Implement va_arg. */
7348 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7351 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7352 tree f_gpr, f_fpr, f_ovf, f_sav;
7353 tree gpr, fpr, ovf, sav, t;
7355 tree lab_false, lab_over = NULL_TREE;
7360 enum machine_mode nat_mode;
7361 unsigned int arg_boundary;
7363 /* Only 64bit target needs something special. */
7364 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7365 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7367 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7368 f_fpr = DECL_CHAIN (f_gpr);
7369 f_ovf = DECL_CHAIN (f_fpr);
7370 f_sav = DECL_CHAIN (f_ovf);
7372 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7373 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7374 valist = build_va_arg_indirect_ref (valist);
7375 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7376 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7377 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7379 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7381 type = build_pointer_type (type);
7382 size = int_size_in_bytes (type);
7383 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7385 nat_mode = type_natural_mode (type, NULL);
7394 /* Unnamed 256bit vector mode parameters are passed on stack. */
7395 if (ix86_cfun_abi () == SYSV_ABI)
7402 container = construct_container (nat_mode, TYPE_MODE (type),
7403 type, 0, X86_64_REGPARM_MAX,
7404 X86_64_SSE_REGPARM_MAX, intreg,
7409 /* Pull the value out of the saved registers. */
7411 addr = create_tmp_var (ptr_type_node, "addr");
7415 int needed_intregs, needed_sseregs;
7417 tree int_addr, sse_addr;
7419 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7420 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7422 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7424 need_temp = (!REG_P (container)
7425 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7426 || TYPE_ALIGN (type) > 128));
7428 /* In case we are passing structure, verify that it is consecutive block
7429 on the register save area. If not we need to do moves. */
7430 if (!need_temp && !REG_P (container))
7432 /* Verify that all registers are strictly consecutive */
7433 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7437 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7439 rtx slot = XVECEXP (container, 0, i);
7440 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7441 || INTVAL (XEXP (slot, 1)) != i * 16)
7449 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7451 rtx slot = XVECEXP (container, 0, i);
7452 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7453 || INTVAL (XEXP (slot, 1)) != i * 8)
7465 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7466 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7469 /* First ensure that we fit completely in registers. */
7472 t = build_int_cst (TREE_TYPE (gpr),
7473 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7474 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7475 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7476 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7477 gimplify_and_add (t, pre_p);
7481 t = build_int_cst (TREE_TYPE (fpr),
7482 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7483 + X86_64_REGPARM_MAX * 8);
7484 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7485 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7486 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7487 gimplify_and_add (t, pre_p);
7490 /* Compute index to start of area used for integer regs. */
7493 /* int_addr = gpr + sav; */
7494 t = fold_convert (sizetype, gpr);
7495 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7496 gimplify_assign (int_addr, t, pre_p);
7500 /* sse_addr = fpr + sav; */
7501 t = fold_convert (sizetype, fpr);
7502 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7503 gimplify_assign (sse_addr, t, pre_p);
7507 int i, prev_size = 0;
7508 tree temp = create_tmp_var (type, "va_arg_tmp");
7511 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7512 gimplify_assign (addr, t, pre_p);
7514 for (i = 0; i < XVECLEN (container, 0); i++)
7516 rtx slot = XVECEXP (container, 0, i);
7517 rtx reg = XEXP (slot, 0);
7518 enum machine_mode mode = GET_MODE (reg);
7524 tree dest_addr, dest;
7525 int cur_size = GET_MODE_SIZE (mode);
7527 if (prev_size + cur_size > size)
7529 cur_size = size - prev_size;
7530 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7531 if (mode == BLKmode)
7534 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7535 if (mode == GET_MODE (reg))
7536 addr_type = build_pointer_type (piece_type);
7538 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7540 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7543 if (SSE_REGNO_P (REGNO (reg)))
7545 src_addr = sse_addr;
7546 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7550 src_addr = int_addr;
7551 src_offset = REGNO (reg) * 8;
7553 src_addr = fold_convert (addr_type, src_addr);
7554 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7555 size_int (src_offset));
7557 dest_addr = fold_convert (daddr_type, addr);
7558 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7559 size_int (INTVAL (XEXP (slot, 1))));
7560 if (cur_size == GET_MODE_SIZE (mode))
7562 src = build_va_arg_indirect_ref (src_addr);
7563 dest = build_va_arg_indirect_ref (dest_addr);
7565 gimplify_assign (dest, src, pre_p);
7570 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7571 3, dest_addr, src_addr,
7572 size_int (cur_size));
7573 gimplify_and_add (copy, pre_p);
7575 prev_size += cur_size;
7581 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7582 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7583 gimplify_assign (gpr, t, pre_p);
7588 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7589 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7590 gimplify_assign (fpr, t, pre_p);
7593 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7595 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7598 /* ... otherwise out of the overflow area. */
7600 /* When we align parameter on stack for caller, if the parameter
7601 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7602 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7603 here with caller. */
7604 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7605 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7606 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7608 /* Care for on-stack alignment if needed. */
7609 if (arg_boundary <= 64 || size == 0)
7613 HOST_WIDE_INT align = arg_boundary / 8;
7614 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7615 size_int (align - 1));
7616 t = fold_convert (sizetype, t);
7617 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7619 t = fold_convert (TREE_TYPE (ovf), t);
7622 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7623 gimplify_assign (addr, t, pre_p);
7625 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7626 size_int (rsize * UNITS_PER_WORD));
7627 gimplify_assign (unshare_expr (ovf), t, pre_p);
7630 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7632 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7633 addr = fold_convert (ptrtype, addr);
7636 addr = build_va_arg_indirect_ref (addr);
7637 return build_va_arg_indirect_ref (addr);
7640 /* Return true if OPNUM's MEM should be matched
7641 in movabs* patterns. */
7644 ix86_check_movabs (rtx insn, int opnum)
7648 set = PATTERN (insn);
7649 if (GET_CODE (set) == PARALLEL)
7650 set = XVECEXP (set, 0, 0);
7651 gcc_assert (GET_CODE (set) == SET);
7652 mem = XEXP (set, opnum);
7653 while (GET_CODE (mem) == SUBREG)
7654 mem = SUBREG_REG (mem);
7655 gcc_assert (MEM_P (mem));
7656 return volatile_ok || !MEM_VOLATILE_P (mem);
7659 /* Initialize the table of extra 80387 mathematical constants. */
7662 init_ext_80387_constants (void)
7664 static const char * cst[5] =
7666 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7667 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7668 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7669 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7670 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7674 for (i = 0; i < 5; i++)
7676 real_from_string (&ext_80387_constants_table[i], cst[i]);
7677 /* Ensure each constant is rounded to XFmode precision. */
7678 real_convert (&ext_80387_constants_table[i],
7679 XFmode, &ext_80387_constants_table[i]);
7682 ext_80387_constants_init = 1;
7685 /* Return non-zero if the constant is something that
7686 can be loaded with a special instruction. */
7689 standard_80387_constant_p (rtx x)
7691 enum machine_mode mode = GET_MODE (x);
7695 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7698 if (x == CONST0_RTX (mode))
7700 if (x == CONST1_RTX (mode))
7703 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7705 /* For XFmode constants, try to find a special 80387 instruction when
7706 optimizing for size or on those CPUs that benefit from them. */
7708 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7712 if (! ext_80387_constants_init)
7713 init_ext_80387_constants ();
7715 for (i = 0; i < 5; i++)
7716 if (real_identical (&r, &ext_80387_constants_table[i]))
7720 /* Load of the constant -0.0 or -1.0 will be split as
7721 fldz;fchs or fld1;fchs sequence. */
7722 if (real_isnegzero (&r))
7724 if (real_identical (&r, &dconstm1))
7730 /* Return the opcode of the special instruction to be used to load
7734 standard_80387_constant_opcode (rtx x)
7736 switch (standard_80387_constant_p (x))
7760 /* Return the CONST_DOUBLE representing the 80387 constant that is
7761 loaded by the specified special instruction. The argument IDX
7762 matches the return value from standard_80387_constant_p. */
7765 standard_80387_constant_rtx (int idx)
7769 if (! ext_80387_constants_init)
7770 init_ext_80387_constants ();
7786 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7790 /* Return 1 if X is all 0s and 2 if x is all 1s
7791 in supported SSE vector mode. */
7794 standard_sse_constant_p (rtx x)
7796 enum machine_mode mode = GET_MODE (x);
7798 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7800 if (vector_all_ones_operand (x, mode))
7816 /* Return the opcode of the special instruction to be used to load
7820 standard_sse_constant_opcode (rtx insn, rtx x)
7822 switch (standard_sse_constant_p (x))
7825 switch (get_attr_mode (insn))
7828 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7830 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7831 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7833 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7835 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7836 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7838 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7840 return "vxorps\t%x0, %x0, %x0";
7842 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7843 return "vxorps\t%x0, %x0, %x0";
7845 return "vxorpd\t%x0, %x0, %x0";
7847 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7848 return "vxorps\t%x0, %x0, %x0";
7850 return "vpxor\t%x0, %x0, %x0";
7855 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7862 /* Returns true if OP contains a symbol reference */
7865 symbolic_reference_mentioned_p (rtx op)
7870 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7873 fmt = GET_RTX_FORMAT (GET_CODE (op));
7874 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7880 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7881 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7885 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7892 /* Return true if it is appropriate to emit `ret' instructions in the
7893 body of a function. Do this only if the epilogue is simple, needing a
7894 couple of insns. Prior to reloading, we can't tell how many registers
7895 must be saved, so return false then. Return false if there is no frame
7896 marker to de-allocate. */
7899 ix86_can_use_return_insn_p (void)
7901 struct ix86_frame frame;
7903 if (! reload_completed || frame_pointer_needed)
7906 /* Don't allow more than 32k pop, since that's all we can do
7907 with one instruction. */
7908 if (crtl->args.pops_args && crtl->args.size >= 32768)
7911 ix86_compute_frame_layout (&frame);
7912 return (frame.stack_pointer_offset == UNITS_PER_WORD
7913 && (frame.nregs + frame.nsseregs) == 0);
7916 /* Value should be nonzero if functions must have frame pointers.
7917 Zero means the frame pointer need not be set up (and parms may
7918 be accessed via the stack pointer) in functions that seem suitable. */
7921 ix86_frame_pointer_required (void)
7923 /* If we accessed previous frames, then the generated code expects
7924 to be able to access the saved ebp value in our frame. */
7925 if (cfun->machine->accesses_prev_frame)
7928 /* Several x86 os'es need a frame pointer for other reasons,
7929 usually pertaining to setjmp. */
7930 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7933 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7934 turns off the frame pointer by default. Turn it back on now if
7935 we've not got a leaf function. */
7936 if (TARGET_OMIT_LEAF_FRAME_POINTER
7937 && (!current_function_is_leaf
7938 || ix86_current_function_calls_tls_descriptor))
7941 if (crtl->profile && !flag_fentry)
7947 /* Record that the current function accesses previous call frames. */
7950 ix86_setup_frame_addresses (void)
7952 cfun->machine->accesses_prev_frame = 1;
7955 #ifndef USE_HIDDEN_LINKONCE
7956 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7957 # define USE_HIDDEN_LINKONCE 1
7959 # define USE_HIDDEN_LINKONCE 0
7963 static int pic_labels_used;
7965 /* Fills in the label name that should be used for a pc thunk for
7966 the given register. */
7969 get_pc_thunk_name (char name[32], unsigned int regno)
7971 gcc_assert (!TARGET_64BIT);
7973 if (USE_HIDDEN_LINKONCE)
7974 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7976 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7980 /* This function generates code for -fpic that loads %ebx with
7981 the return address of the caller and then returns. */
7984 ix86_code_end (void)
7989 for (regno = AX_REG; regno <= SP_REG; regno++)
7994 if (!(pic_labels_used & (1 << regno)))
7997 get_pc_thunk_name (name, regno);
7999 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8000 get_identifier (name),
8001 build_function_type (void_type_node, void_list_node));
8002 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8003 NULL_TREE, void_type_node);
8004 TREE_PUBLIC (decl) = 1;
8005 TREE_STATIC (decl) = 1;
8010 switch_to_section (darwin_sections[text_coal_section]);
8011 fputs ("\t.weak_definition\t", asm_out_file);
8012 assemble_name (asm_out_file, name);
8013 fputs ("\n\t.private_extern\t", asm_out_file);
8014 assemble_name (asm_out_file, name);
8015 putc ('\n', asm_out_file);
8016 ASM_OUTPUT_LABEL (asm_out_file, name);
8017 DECL_WEAK (decl) = 1;
8021 if (USE_HIDDEN_LINKONCE)
8023 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8025 targetm.asm_out.unique_section (decl, 0);
8026 switch_to_section (get_named_section (decl, NULL, 0));
8028 targetm.asm_out.globalize_label (asm_out_file, name);
8029 fputs ("\t.hidden\t", asm_out_file);
8030 assemble_name (asm_out_file, name);
8031 putc ('\n', asm_out_file);
8032 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8036 switch_to_section (text_section);
8037 ASM_OUTPUT_LABEL (asm_out_file, name);
8040 DECL_INITIAL (decl) = make_node (BLOCK);
8041 current_function_decl = decl;
8042 init_function_start (decl);
8043 first_function_block_is_cold = false;
8044 /* Make sure unwind info is emitted for the thunk if needed. */
8045 final_start_function (emit_barrier (), asm_out_file, 1);
8047 /* Pad stack IP move with 4 instructions (two NOPs count
8048 as one instruction). */
8049 if (TARGET_PAD_SHORT_FUNCTION)
8054 fputs ("\tnop\n", asm_out_file);
8057 xops[0] = gen_rtx_REG (Pmode, regno);
8058 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8059 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8060 fputs ("\tret\n", asm_out_file);
8061 final_end_function ();
8062 init_insn_lengths ();
8063 free_after_compilation (cfun);
8065 current_function_decl = NULL;
8068 if (flag_split_stack)
8069 file_end_indicate_split_stack ();
8072 /* Emit code for the SET_GOT patterns. */
8075 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8081 if (TARGET_VXWORKS_RTP && flag_pic)
8083 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8084 xops[2] = gen_rtx_MEM (Pmode,
8085 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8086 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8088 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8089 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8090 an unadorned address. */
8091 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8092 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8093 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8097 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8099 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8101 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8104 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8107 output_asm_insn ("call\t%a2", xops);
8108 #ifdef DWARF2_UNWIND_INFO
8109 /* The call to next label acts as a push. */
8110 if (dwarf2out_do_frame ())
8114 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8115 gen_rtx_PLUS (Pmode,
8118 RTX_FRAME_RELATED_P (insn) = 1;
8119 dwarf2out_frame_debug (insn, true);
8126 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8127 is what will be referenced by the Mach-O PIC subsystem. */
8129 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8132 targetm.asm_out.internal_label (asm_out_file, "L",
8133 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8137 output_asm_insn ("pop%z0\t%0", xops);
8138 #ifdef DWARF2_UNWIND_INFO
8139 /* The pop is a pop and clobbers dest, but doesn't restore it
8140 for unwind info purposes. */
8141 if (dwarf2out_do_frame ())
8145 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8146 dwarf2out_frame_debug (insn, true);
8147 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8148 gen_rtx_PLUS (Pmode,
8151 RTX_FRAME_RELATED_P (insn) = 1;
8152 dwarf2out_frame_debug (insn, true);
8161 get_pc_thunk_name (name, REGNO (dest));
8162 pic_labels_used |= 1 << REGNO (dest);
8164 #ifdef DWARF2_UNWIND_INFO
8165 /* Ensure all queued register saves are flushed before the
8167 if (dwarf2out_do_frame ())
8168 dwarf2out_flush_queued_reg_saves ();
8170 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8171 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8172 output_asm_insn ("call\t%X2", xops);
8173 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8174 is what will be referenced by the Mach-O PIC subsystem. */
8177 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8179 targetm.asm_out.internal_label (asm_out_file, "L",
8180 CODE_LABEL_NUMBER (label));
8187 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8188 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8190 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8195 /* Generate an "push" pattern for input ARG. */
8200 struct machine_function *m = cfun->machine;
8202 if (m->fs.cfa_reg == stack_pointer_rtx)
8203 m->fs.cfa_offset += UNITS_PER_WORD;
8204 m->fs.sp_offset += UNITS_PER_WORD;
8206 return gen_rtx_SET (VOIDmode,
8208 gen_rtx_PRE_DEC (Pmode,
8209 stack_pointer_rtx)),
8213 /* Generate an "pop" pattern for input ARG. */
8218 return gen_rtx_SET (VOIDmode,
8221 gen_rtx_POST_INC (Pmode,
8222 stack_pointer_rtx)));
8225 /* Return >= 0 if there is an unused call-clobbered register available
8226 for the entire function. */
8229 ix86_select_alt_pic_regnum (void)
8231 if (current_function_is_leaf
8233 && !ix86_current_function_calls_tls_descriptor)
8236 /* Can't use the same register for both PIC and DRAP. */
8238 drap = REGNO (crtl->drap_reg);
8241 for (i = 2; i >= 0; --i)
8242 if (i != drap && !df_regs_ever_live_p (i))
8246 return INVALID_REGNUM;
8249 /* Return 1 if we need to save REGNO. */
8251 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8253 if (pic_offset_table_rtx
8254 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8255 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8257 || crtl->calls_eh_return
8258 || crtl->uses_const_pool))
8260 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8265 if (crtl->calls_eh_return && maybe_eh_return)
8270 unsigned test = EH_RETURN_DATA_REGNO (i);
8271 if (test == INVALID_REGNUM)
8278 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8281 return (df_regs_ever_live_p (regno)
8282 && !call_used_regs[regno]
8283 && !fixed_regs[regno]
8284 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8287 /* Return number of saved general prupose registers. */
8290 ix86_nsaved_regs (void)
8295 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8296 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8301 /* Return number of saved SSE registrers. */
8304 ix86_nsaved_sseregs (void)
8309 if (ix86_cfun_abi () != MS_ABI)
8311 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8312 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8317 /* Given FROM and TO register numbers, say whether this elimination is
8318 allowed. If stack alignment is needed, we can only replace argument
8319 pointer with hard frame pointer, or replace frame pointer with stack
8320 pointer. Otherwise, frame pointer elimination is automatically
8321 handled and all other eliminations are valid. */
8324 ix86_can_eliminate (const int from, const int to)
8326 if (stack_realign_fp)
8327 return ((from == ARG_POINTER_REGNUM
8328 && to == HARD_FRAME_POINTER_REGNUM)
8329 || (from == FRAME_POINTER_REGNUM
8330 && to == STACK_POINTER_REGNUM));
8332 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8335 /* Return the offset between two registers, one to be eliminated, and the other
8336 its replacement, at the start of a routine. */
8339 ix86_initial_elimination_offset (int from, int to)
8341 struct ix86_frame frame;
8342 ix86_compute_frame_layout (&frame);
8344 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8345 return frame.hard_frame_pointer_offset;
8346 else if (from == FRAME_POINTER_REGNUM
8347 && to == HARD_FRAME_POINTER_REGNUM)
8348 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8351 gcc_assert (to == STACK_POINTER_REGNUM);
8353 if (from == ARG_POINTER_REGNUM)
8354 return frame.stack_pointer_offset;
8356 gcc_assert (from == FRAME_POINTER_REGNUM);
8357 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8361 /* In a dynamically-aligned function, we can't know the offset from
8362 stack pointer to frame pointer, so we must ensure that setjmp
8363 eliminates fp against the hard fp (%ebp) rather than trying to
8364 index from %esp up to the top of the frame across a gap that is
8365 of unknown (at compile-time) size. */
8367 ix86_builtin_setjmp_frame_value (void)
8369 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8372 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8373 field in the TCB, so they can not be used together. */
8376 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8380 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8382 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8385 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8388 error ("%<-fsplit-stack%> requires "
8389 "assembler support for CFI directives");
8397 /* When using -fsplit-stack, the allocation routines set a field in
8398 the TCB to the bottom of the stack plus this much space, measured
8401 #define SPLIT_STACK_AVAILABLE 256
8403 /* Fill structure ix86_frame about frame of currently computed function. */
8406 ix86_compute_frame_layout (struct ix86_frame *frame)
8408 unsigned int stack_alignment_needed;
8409 HOST_WIDE_INT offset;
8410 unsigned int preferred_alignment;
8411 HOST_WIDE_INT size = get_frame_size ();
8412 HOST_WIDE_INT to_allocate;
8414 frame->nregs = ix86_nsaved_regs ();
8415 frame->nsseregs = ix86_nsaved_sseregs ();
8417 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8418 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8420 /* MS ABI seem to require stack alignment to be always 16 except for function
8421 prologues and leaf. */
8422 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8423 && (!current_function_is_leaf || cfun->calls_alloca != 0
8424 || ix86_current_function_calls_tls_descriptor))
8426 preferred_alignment = 16;
8427 stack_alignment_needed = 16;
8428 crtl->preferred_stack_boundary = 128;
8429 crtl->stack_alignment_needed = 128;
8432 gcc_assert (!size || stack_alignment_needed);
8433 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8434 gcc_assert (preferred_alignment <= stack_alignment_needed);
8436 /* During reload iteration the amount of registers saved can change.
8437 Recompute the value as needed. Do not recompute when amount of registers
8438 didn't change as reload does multiple calls to the function and does not
8439 expect the decision to change within single iteration. */
8440 if (!optimize_function_for_size_p (cfun)
8441 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8443 int count = frame->nregs;
8444 struct cgraph_node *node = cgraph_node (current_function_decl);
8446 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8447 /* The fast prologue uses move instead of push to save registers. This
8448 is significantly longer, but also executes faster as modern hardware
8449 can execute the moves in parallel, but can't do that for push/pop.
8451 Be careful about choosing what prologue to emit: When function takes
8452 many instructions to execute we may use slow version as well as in
8453 case function is known to be outside hot spot (this is known with
8454 feedback only). Weight the size of function by number of registers
8455 to save as it is cheap to use one or two push instructions but very
8456 slow to use many of them. */
8458 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8459 if (node->frequency < NODE_FREQUENCY_NORMAL
8460 || (flag_branch_probabilities
8461 && node->frequency < NODE_FREQUENCY_HOT))
8462 cfun->machine->use_fast_prologue_epilogue = false;
8464 cfun->machine->use_fast_prologue_epilogue
8465 = !expensive_function_p (count);
8467 if (TARGET_PROLOGUE_USING_MOVE
8468 && cfun->machine->use_fast_prologue_epilogue)
8469 frame->save_regs_using_mov = true;
8471 frame->save_regs_using_mov = false;
8473 /* If static stack checking is enabled and done with probes, the registers
8474 need to be saved before allocating the frame. */
8475 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8476 frame->save_regs_using_mov = false;
8478 /* Skip return address. */
8479 offset = UNITS_PER_WORD;
8481 /* Skip pushed static chain. */
8482 if (ix86_static_chain_on_stack)
8483 offset += UNITS_PER_WORD;
8485 /* Skip saved base pointer. */
8486 if (frame_pointer_needed)
8487 offset += UNITS_PER_WORD;
8489 frame->hard_frame_pointer_offset = offset;
8491 /* Register save area */
8492 offset += frame->nregs * UNITS_PER_WORD;
8493 frame->reg_save_offset = offset;
8495 /* Align and set SSE register save area. */
8496 if (frame->nsseregs)
8498 /* The only ABI that has saved SSE registers (Win64) also has a
8499 16-byte aligned default stack, and thus we don't need to be
8500 within the re-aligned local stack frame to save them. */
8501 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8502 offset = (offset + 16 - 1) & -16;
8503 offset += frame->nsseregs * 16;
8505 frame->sse_reg_save_offset = offset;
8507 /* The re-aligned stack starts here. Values before this point are not
8508 directly comparable with values below this point. In order to make
8509 sure that no value happens to be the same before and after, force
8510 the alignment computation below to add a non-zero value. */
8511 if (stack_realign_fp)
8512 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8515 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8516 offset += frame->va_arg_size;
8518 /* Align start of frame for local function. */
8519 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8521 /* Frame pointer points here. */
8522 frame->frame_pointer_offset = offset;
8526 /* Add outgoing arguments area. Can be skipped if we eliminated
8527 all the function calls as dead code.
8528 Skipping is however impossible when function calls alloca. Alloca
8529 expander assumes that last crtl->outgoing_args_size
8530 of stack frame are unused. */
8531 if (ACCUMULATE_OUTGOING_ARGS
8532 && (!current_function_is_leaf || cfun->calls_alloca
8533 || ix86_current_function_calls_tls_descriptor))
8535 offset += crtl->outgoing_args_size;
8536 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8539 frame->outgoing_arguments_size = 0;
8541 /* Align stack boundary. Only needed if we're calling another function
8543 if (!current_function_is_leaf || cfun->calls_alloca
8544 || ix86_current_function_calls_tls_descriptor)
8545 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8547 /* We've reached end of stack frame. */
8548 frame->stack_pointer_offset = offset;
8550 /* Size prologue needs to allocate. */
8551 to_allocate = offset - frame->sse_reg_save_offset;
8553 if ((!to_allocate && frame->nregs <= 1)
8554 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8555 frame->save_regs_using_mov = false;
8557 if (ix86_using_red_zone ()
8558 && current_function_sp_is_unchanging
8559 && current_function_is_leaf
8560 && !ix86_current_function_calls_tls_descriptor)
8562 frame->red_zone_size = to_allocate;
8563 if (frame->save_regs_using_mov)
8564 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8565 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8566 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8569 frame->red_zone_size = 0;
8570 frame->stack_pointer_offset -= frame->red_zone_size;
8573 /* This is semi-inlined memory_address_length, but simplified
8574 since we know that we're always dealing with reg+offset, and
8575 to avoid having to create and discard all that rtl. */
8578 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8584 /* EBP and R13 cannot be encoded without an offset. */
8585 len = (regno == BP_REG || regno == R13_REG);
8587 else if (IN_RANGE (offset, -128, 127))
8590 /* ESP and R12 must be encoded with a SIB byte. */
8591 if (regno == SP_REG || regno == R12_REG)
8597 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8598 The valid base registers are taken from CFUN->MACHINE->FS. */
8601 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8603 const struct machine_function *m = cfun->machine;
8604 rtx base_reg = NULL;
8605 HOST_WIDE_INT base_offset = 0;
8607 if (m->use_fast_prologue_epilogue)
8609 /* Choose the base register most likely to allow the most scheduling
8610 opportunities. Generally FP is valid througout the function,
8611 while DRAP must be reloaded within the epilogue. But choose either
8612 over the SP due to increased encoding size. */
8616 base_reg = hard_frame_pointer_rtx;
8617 base_offset = m->fs.fp_offset - cfa_offset;
8619 else if (m->fs.drap_valid)
8621 base_reg = crtl->drap_reg;
8622 base_offset = 0 - cfa_offset;
8624 else if (m->fs.sp_valid)
8626 base_reg = stack_pointer_rtx;
8627 base_offset = m->fs.sp_offset - cfa_offset;
8632 HOST_WIDE_INT toffset;
8635 /* Choose the base register with the smallest address encoding.
8636 With a tie, choose FP > DRAP > SP. */
8639 base_reg = stack_pointer_rtx;
8640 base_offset = m->fs.sp_offset - cfa_offset;
8641 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8643 if (m->fs.drap_valid)
8645 toffset = 0 - cfa_offset;
8646 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8649 base_reg = crtl->drap_reg;
8650 base_offset = toffset;
8656 toffset = m->fs.fp_offset - cfa_offset;
8657 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8660 base_reg = hard_frame_pointer_rtx;
8661 base_offset = toffset;
8666 gcc_assert (base_reg != NULL);
8668 return plus_constant (base_reg, base_offset);
8671 /* Emit code to save registers in the prologue. */
8674 ix86_emit_save_regs (void)
8679 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8680 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8682 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8683 RTX_FRAME_RELATED_P (insn) = 1;
8687 /* Emit a single register save at CFA - CFA_OFFSET. */
8690 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8691 HOST_WIDE_INT cfa_offset)
8693 struct machine_function *m = cfun->machine;
8694 rtx reg = gen_rtx_REG (mode, regno);
8695 rtx mem, addr, base, insn;
8697 addr = choose_baseaddr (cfa_offset);
8698 mem = gen_frame_mem (mode, addr);
8700 /* For SSE saves, we need to indicate the 128-bit alignment. */
8701 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8703 insn = emit_move_insn (mem, reg);
8704 RTX_FRAME_RELATED_P (insn) = 1;
8707 if (GET_CODE (base) == PLUS)
8708 base = XEXP (base, 0);
8709 gcc_checking_assert (REG_P (base));
8711 /* When saving registers into a re-aligned local stack frame, avoid
8712 any tricky guessing by dwarf2out. */
8713 if (m->fs.realigned)
8715 gcc_checking_assert (stack_realign_drap);
8717 if (regno == REGNO (crtl->drap_reg))
8719 /* A bit of a hack. We force the DRAP register to be saved in
8720 the re-aligned stack frame, which provides us with a copy
8721 of the CFA that will last past the prologue. Install it. */
8722 gcc_checking_assert (cfun->machine->fs.fp_valid);
8723 addr = plus_constant (hard_frame_pointer_rtx,
8724 cfun->machine->fs.fp_offset - cfa_offset);
8725 mem = gen_rtx_MEM (mode, addr);
8726 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8730 /* The frame pointer is a stable reference within the
8731 aligned frame. Use it. */
8732 gcc_checking_assert (cfun->machine->fs.fp_valid);
8733 addr = plus_constant (hard_frame_pointer_rtx,
8734 cfun->machine->fs.fp_offset - cfa_offset);
8735 mem = gen_rtx_MEM (mode, addr);
8736 add_reg_note (insn, REG_CFA_EXPRESSION,
8737 gen_rtx_SET (VOIDmode, mem, reg));
8741 /* The memory may not be relative to the current CFA register,
8742 which means that we may need to generate a new pattern for
8743 use by the unwind info. */
8744 else if (base != m->fs.cfa_reg)
8746 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8747 mem = gen_rtx_MEM (mode, addr);
8748 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8752 /* Emit code to save registers using MOV insns.
8753 First register is stored at CFA - CFA_OFFSET. */
8755 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8759 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8760 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8762 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8763 cfa_offset -= UNITS_PER_WORD;
8767 /* Emit code to save SSE registers using MOV insns.
8768 First register is stored at CFA - CFA_OFFSET. */
8770 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8774 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8775 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8777 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8782 static GTY(()) rtx queued_cfa_restores;
8784 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8785 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8786 Don't add the note if the previously saved value will be left untouched
8787 within stack red-zone till return, as unwinders can find the same value
8788 in the register and on the stack. */
8791 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8793 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8798 add_reg_note (insn, REG_CFA_RESTORE, reg);
8799 RTX_FRAME_RELATED_P (insn) = 1;
8803 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8806 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8809 ix86_add_queued_cfa_restore_notes (rtx insn)
8812 if (!queued_cfa_restores)
8814 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8816 XEXP (last, 1) = REG_NOTES (insn);
8817 REG_NOTES (insn) = queued_cfa_restores;
8818 queued_cfa_restores = NULL_RTX;
8819 RTX_FRAME_RELATED_P (insn) = 1;
8822 /* Expand prologue or epilogue stack adjustment.
8823 The pattern exist to put a dependency on all ebp-based memory accesses.
8824 STYLE should be negative if instructions should be marked as frame related,
8825 zero if %r11 register is live and cannot be freely used and positive
8829 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8830 int style, bool set_cfa)
8832 struct machine_function *m = cfun->machine;
8836 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
8837 else if (x86_64_immediate_operand (offset, DImode))
8838 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
8842 /* r11 is used by indirect sibcall return as well, set before the
8843 epilogue and used after the epilogue. */
8845 tmp = gen_rtx_REG (DImode, R11_REG);
8848 gcc_assert (src != hard_frame_pointer_rtx
8849 && dest != hard_frame_pointer_rtx);
8850 tmp = hard_frame_pointer_rtx;
8852 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8854 RTX_FRAME_RELATED_P (insn) = 1;
8856 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
8859 insn = emit_insn (insn);
8861 ix86_add_queued_cfa_restore_notes (insn);
8867 gcc_assert (m->fs.cfa_reg == src);
8868 m->fs.cfa_offset += INTVAL (offset);
8869 m->fs.cfa_reg = dest;
8871 r = gen_rtx_PLUS (Pmode, src, offset);
8872 r = gen_rtx_SET (VOIDmode, dest, r);
8873 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8874 RTX_FRAME_RELATED_P (insn) = 1;
8877 RTX_FRAME_RELATED_P (insn) = 1;
8879 if (dest == stack_pointer_rtx)
8881 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8882 bool valid = m->fs.sp_valid;
8884 if (src == hard_frame_pointer_rtx)
8886 valid = m->fs.fp_valid;
8887 ooffset = m->fs.fp_offset;
8889 else if (src == crtl->drap_reg)
8891 valid = m->fs.drap_valid;
8896 /* Else there are two possibilities: SP itself, which we set
8897 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8898 taken care of this by hand along the eh_return path. */
8899 gcc_checking_assert (src == stack_pointer_rtx
8900 || offset == const0_rtx);
8903 m->fs.sp_offset = ooffset - INTVAL (offset);
8904 m->fs.sp_valid = valid;
8908 /* Find an available register to be used as dynamic realign argument
8909 pointer regsiter. Such a register will be written in prologue and
8910 used in begin of body, so it must not be
8911 1. parameter passing register.
8913 We reuse static-chain register if it is available. Otherwise, we
8914 use DI for i386 and R13 for x86-64. We chose R13 since it has
8917 Return: the regno of chosen register. */
8920 find_drap_reg (void)
8922 tree decl = cfun->decl;
8926 /* Use R13 for nested function or function need static chain.
8927 Since function with tail call may use any caller-saved
8928 registers in epilogue, DRAP must not use caller-saved
8929 register in such case. */
8930 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8937 /* Use DI for nested function or function need static chain.
8938 Since function with tail call may use any caller-saved
8939 registers in epilogue, DRAP must not use caller-saved
8940 register in such case. */
8941 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8944 /* Reuse static chain register if it isn't used for parameter
8946 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8947 && !lookup_attribute ("fastcall",
8948 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8949 && !lookup_attribute ("thiscall",
8950 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8957 /* Return minimum incoming stack alignment. */
8960 ix86_minimum_incoming_stack_boundary (bool sibcall)
8962 unsigned int incoming_stack_boundary;
8964 /* Prefer the one specified at command line. */
8965 if (ix86_user_incoming_stack_boundary)
8966 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8967 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8968 if -mstackrealign is used, it isn't used for sibcall check and
8969 estimated stack alignment is 128bit. */
8972 && ix86_force_align_arg_pointer
8973 && crtl->stack_alignment_estimated == 128)
8974 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8976 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8978 /* Incoming stack alignment can be changed on individual functions
8979 via force_align_arg_pointer attribute. We use the smallest
8980 incoming stack boundary. */
8981 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8982 && lookup_attribute (ix86_force_align_arg_pointer_string,
8983 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8984 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8986 /* The incoming stack frame has to be aligned at least at
8987 parm_stack_boundary. */
8988 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8989 incoming_stack_boundary = crtl->parm_stack_boundary;
8991 /* Stack at entrance of main is aligned by runtime. We use the
8992 smallest incoming stack boundary. */
8993 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8994 && DECL_NAME (current_function_decl)
8995 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8996 && DECL_FILE_SCOPE_P (current_function_decl))
8997 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8999 return incoming_stack_boundary;
9002 /* Update incoming stack boundary and estimated stack alignment. */
9005 ix86_update_stack_boundary (void)
9007 ix86_incoming_stack_boundary
9008 = ix86_minimum_incoming_stack_boundary (false);
9010 /* x86_64 vararg needs 16byte stack alignment for register save
9014 && crtl->stack_alignment_estimated < 128)
9015 crtl->stack_alignment_estimated = 128;
9018 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9019 needed or an rtx for DRAP otherwise. */
9022 ix86_get_drap_rtx (void)
9024 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9025 crtl->need_drap = true;
9027 if (stack_realign_drap)
9029 /* Assign DRAP to vDRAP and returns vDRAP */
9030 unsigned int regno = find_drap_reg ();
9035 arg_ptr = gen_rtx_REG (Pmode, regno);
9036 crtl->drap_reg = arg_ptr;
9039 drap_vreg = copy_to_reg (arg_ptr);
9043 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9046 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9047 RTX_FRAME_RELATED_P (insn) = 1;
9055 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9058 ix86_internal_arg_pointer (void)
9060 return virtual_incoming_args_rtx;
9063 struct scratch_reg {
9068 /* Return a short-lived scratch register for use on function entry.
9069 In 32-bit mode, it is valid only after the registers are saved
9070 in the prologue. This register must be released by means of
9071 release_scratch_register_on_entry once it is dead. */
9074 get_scratch_register_on_entry (struct scratch_reg *sr)
9082 /* We always use R11 in 64-bit mode. */
9087 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9089 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9090 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9091 int regparm = ix86_function_regparm (fntype, decl);
9093 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9095 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9096 for the static chain register. */
9097 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9098 && drap_regno != AX_REG)
9100 else if (regparm < 2 && drap_regno != DX_REG)
9102 /* ecx is the static chain register. */
9103 else if (regparm < 3 && !fastcall_p && !static_chain_p
9104 && drap_regno != CX_REG)
9106 else if (ix86_save_reg (BX_REG, true))
9108 /* esi is the static chain register. */
9109 else if (!(regparm == 3 && static_chain_p)
9110 && ix86_save_reg (SI_REG, true))
9112 else if (ix86_save_reg (DI_REG, true))
9116 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9121 sr->reg = gen_rtx_REG (Pmode, regno);
9124 rtx insn = emit_insn (gen_push (sr->reg));
9125 RTX_FRAME_RELATED_P (insn) = 1;
9129 /* Release a scratch register obtained from the preceding function. */
9132 release_scratch_register_on_entry (struct scratch_reg *sr)
9136 rtx x, insn = emit_insn (gen_pop (sr->reg));
9138 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9139 RTX_FRAME_RELATED_P (insn) = 1;
9140 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9141 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9142 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9146 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9148 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9151 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9153 /* We skip the probe for the first interval + a small dope of 4 words and
9154 probe that many bytes past the specified size to maintain a protection
9155 area at the botton of the stack. */
9156 const int dope = 4 * UNITS_PER_WORD;
9157 rtx size_rtx = GEN_INT (size);
9159 /* See if we have a constant small number of probes to generate. If so,
9160 that's the easy case. The run-time loop is made up of 11 insns in the
9161 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9162 for n # of intervals. */
9163 if (size <= 5 * PROBE_INTERVAL)
9165 HOST_WIDE_INT i, adjust;
9166 bool first_probe = true;
9168 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9169 values of N from 1 until it exceeds SIZE. If only one probe is
9170 needed, this will not generate any code. Then adjust and probe
9171 to PROBE_INTERVAL + SIZE. */
9172 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9176 adjust = 2 * PROBE_INTERVAL + dope;
9177 first_probe = false;
9180 adjust = PROBE_INTERVAL;
9182 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9183 plus_constant (stack_pointer_rtx, -adjust)));
9184 emit_stack_probe (stack_pointer_rtx);
9188 adjust = size + PROBE_INTERVAL + dope;
9190 adjust = size + PROBE_INTERVAL - i;
9192 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9193 plus_constant (stack_pointer_rtx, -adjust)));
9194 emit_stack_probe (stack_pointer_rtx);
9196 /* Adjust back to account for the additional first interval. */
9197 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9198 plus_constant (stack_pointer_rtx,
9199 PROBE_INTERVAL + dope)));
9202 /* Otherwise, do the same as above, but in a loop. Note that we must be
9203 extra careful with variables wrapping around because we might be at
9204 the very top (or the very bottom) of the address space and we have
9205 to be able to handle this case properly; in particular, we use an
9206 equality test for the loop condition. */
9209 HOST_WIDE_INT rounded_size;
9210 struct scratch_reg sr;
9212 get_scratch_register_on_entry (&sr);
9215 /* Step 1: round SIZE to the previous multiple of the interval. */
9217 rounded_size = size & -PROBE_INTERVAL;
9220 /* Step 2: compute initial and final value of the loop counter. */
9222 /* SP = SP_0 + PROBE_INTERVAL. */
9223 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9224 plus_constant (stack_pointer_rtx,
9225 - (PROBE_INTERVAL + dope))));
9227 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9228 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9229 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9230 gen_rtx_PLUS (Pmode, sr.reg,
9231 stack_pointer_rtx)));
9236 while (SP != LAST_ADDR)
9238 SP = SP + PROBE_INTERVAL
9242 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9243 values of N from 1 until it is equal to ROUNDED_SIZE. */
9245 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9248 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9249 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9251 if (size != rounded_size)
9253 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9254 plus_constant (stack_pointer_rtx,
9255 rounded_size - size)));
9256 emit_stack_probe (stack_pointer_rtx);
9259 /* Adjust back to account for the additional first interval. */
9260 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9261 plus_constant (stack_pointer_rtx,
9262 PROBE_INTERVAL + dope)));
9264 release_scratch_register_on_entry (&sr);
9267 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9268 cfun->machine->fs.sp_offset += size;
9270 /* Make sure nothing is scheduled before we are done. */
9271 emit_insn (gen_blockage ());
9274 /* Adjust the stack pointer up to REG while probing it. */
9277 output_adjust_stack_and_probe (rtx reg)
9279 static int labelno = 0;
9280 char loop_lab[32], end_lab[32];
9283 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9284 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9286 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9288 /* Jump to END_LAB if SP == LAST_ADDR. */
9289 xops[0] = stack_pointer_rtx;
9291 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9292 fputs ("\tje\t", asm_out_file);
9293 assemble_name_raw (asm_out_file, end_lab);
9294 fputc ('\n', asm_out_file);
9296 /* SP = SP + PROBE_INTERVAL. */
9297 xops[1] = GEN_INT (PROBE_INTERVAL);
9298 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9301 xops[1] = const0_rtx;
9302 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9304 fprintf (asm_out_file, "\tjmp\t");
9305 assemble_name_raw (asm_out_file, loop_lab);
9306 fputc ('\n', asm_out_file);
9308 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9313 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9314 inclusive. These are offsets from the current stack pointer. */
9317 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9319 /* See if we have a constant small number of probes to generate. If so,
9320 that's the easy case. The run-time loop is made up of 7 insns in the
9321 generic case while the compile-time loop is made up of n insns for n #
9323 if (size <= 7 * PROBE_INTERVAL)
9327 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9328 it exceeds SIZE. If only one probe is needed, this will not
9329 generate any code. Then probe at FIRST + SIZE. */
9330 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9331 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9333 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9336 /* Otherwise, do the same as above, but in a loop. Note that we must be
9337 extra careful with variables wrapping around because we might be at
9338 the very top (or the very bottom) of the address space and we have
9339 to be able to handle this case properly; in particular, we use an
9340 equality test for the loop condition. */
9343 HOST_WIDE_INT rounded_size, last;
9344 struct scratch_reg sr;
9346 get_scratch_register_on_entry (&sr);
9349 /* Step 1: round SIZE to the previous multiple of the interval. */
9351 rounded_size = size & -PROBE_INTERVAL;
9354 /* Step 2: compute initial and final value of the loop counter. */
9356 /* TEST_OFFSET = FIRST. */
9357 emit_move_insn (sr.reg, GEN_INT (-first));
9359 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9360 last = first + rounded_size;
9365 while (TEST_ADDR != LAST_ADDR)
9367 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9371 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9372 until it is equal to ROUNDED_SIZE. */
9374 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9377 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9378 that SIZE is equal to ROUNDED_SIZE. */
9380 if (size != rounded_size)
9381 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9384 rounded_size - size));
9386 release_scratch_register_on_entry (&sr);
9389 /* Make sure nothing is scheduled before we are done. */
9390 emit_insn (gen_blockage ());
9393 /* Probe a range of stack addresses from REG to END, inclusive. These are
9394 offsets from the current stack pointer. */
9397 output_probe_stack_range (rtx reg, rtx end)
9399 static int labelno = 0;
9400 char loop_lab[32], end_lab[32];
9403 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9404 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9406 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9408 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9411 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9412 fputs ("\tje\t", asm_out_file);
9413 assemble_name_raw (asm_out_file, end_lab);
9414 fputc ('\n', asm_out_file);
9416 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9417 xops[1] = GEN_INT (PROBE_INTERVAL);
9418 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9420 /* Probe at TEST_ADDR. */
9421 xops[0] = stack_pointer_rtx;
9423 xops[2] = const0_rtx;
9424 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9426 fprintf (asm_out_file, "\tjmp\t");
9427 assemble_name_raw (asm_out_file, loop_lab);
9428 fputc ('\n', asm_out_file);
9430 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9435 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9436 to be generated in correct form. */
9438 ix86_finalize_stack_realign_flags (void)
9440 /* Check if stack realign is really needed after reload, and
9441 stores result in cfun */
9442 unsigned int incoming_stack_boundary
9443 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9444 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9445 unsigned int stack_realign = (incoming_stack_boundary
9446 < (current_function_is_leaf
9447 ? crtl->max_used_stack_slot_alignment
9448 : crtl->stack_alignment_needed));
9450 if (crtl->stack_realign_finalized)
9452 /* After stack_realign_needed is finalized, we can't no longer
9454 gcc_assert (crtl->stack_realign_needed == stack_realign);
9458 crtl->stack_realign_needed = stack_realign;
9459 crtl->stack_realign_finalized = true;
9463 /* Expand the prologue into a bunch of separate insns. */
9466 ix86_expand_prologue (void)
9468 struct machine_function *m = cfun->machine;
9471 struct ix86_frame frame;
9472 HOST_WIDE_INT allocate;
9473 bool int_registers_saved;
9475 ix86_finalize_stack_realign_flags ();
9477 /* DRAP should not coexist with stack_realign_fp */
9478 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9480 memset (&m->fs, 0, sizeof (m->fs));
9482 /* Initialize CFA state for before the prologue. */
9483 m->fs.cfa_reg = stack_pointer_rtx;
9484 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9486 /* Track SP offset to the CFA. We continue tracking this after we've
9487 swapped the CFA register away from SP. In the case of re-alignment
9488 this is fudged; we're interested to offsets within the local frame. */
9489 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9490 m->fs.sp_valid = true;
9492 ix86_compute_frame_layout (&frame);
9494 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9496 /* We should have already generated an error for any use of
9497 ms_hook on a nested function. */
9498 gcc_checking_assert (!ix86_static_chain_on_stack);
9500 /* Check if profiling is active and we shall use profiling before
9501 prologue variant. If so sorry. */
9502 if (crtl->profile && flag_fentry != 0)
9503 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9505 /* In ix86_asm_output_function_label we emitted:
9506 8b ff movl.s %edi,%edi
9508 8b ec movl.s %esp,%ebp
9510 This matches the hookable function prologue in Win32 API
9511 functions in Microsoft Windows XP Service Pack 2 and newer.
9512 Wine uses this to enable Windows apps to hook the Win32 API
9513 functions provided by Wine.
9515 What that means is that we've already set up the frame pointer. */
9517 if (frame_pointer_needed
9518 && !(crtl->drap_reg && crtl->stack_realign_needed))
9522 /* We've decided to use the frame pointer already set up.
9523 Describe this to the unwinder by pretending that both
9524 push and mov insns happen right here.
9526 Putting the unwind info here at the end of the ms_hook
9527 is done so that we can make absolutely certain we get
9528 the required byte sequence at the start of the function,
9529 rather than relying on an assembler that can produce
9530 the exact encoding required.
9532 However it does mean (in the unpatched case) that we have
9533 a 1 insn window where the asynchronous unwind info is
9534 incorrect. However, if we placed the unwind info at
9535 its correct location we would have incorrect unwind info
9536 in the patched case. Which is probably all moot since
9537 I don't expect Wine generates dwarf2 unwind info for the
9538 system libraries that use this feature. */
9540 insn = emit_insn (gen_blockage ());
9542 push = gen_push (hard_frame_pointer_rtx);
9543 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9545 RTX_FRAME_RELATED_P (push) = 1;
9546 RTX_FRAME_RELATED_P (mov) = 1;
9548 RTX_FRAME_RELATED_P (insn) = 1;
9549 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9550 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9552 /* Note that gen_push incremented m->fs.cfa_offset, even
9553 though we didn't emit the push insn here. */
9554 m->fs.cfa_reg = hard_frame_pointer_rtx;
9555 m->fs.fp_offset = m->fs.cfa_offset;
9556 m->fs.fp_valid = true;
9560 /* The frame pointer is not needed so pop %ebp again.
9561 This leaves us with a pristine state. */
9562 emit_insn (gen_pop (hard_frame_pointer_rtx));
9566 /* The first insn of a function that accepts its static chain on the
9567 stack is to push the register that would be filled in by a direct
9568 call. This insn will be skipped by the trampoline. */
9569 else if (ix86_static_chain_on_stack)
9571 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9572 emit_insn (gen_blockage ());
9574 /* We don't want to interpret this push insn as a register save,
9575 only as a stack adjustment. The real copy of the register as
9576 a save will be done later, if needed. */
9577 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9578 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9579 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9580 RTX_FRAME_RELATED_P (insn) = 1;
9583 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9584 of DRAP is needed and stack realignment is really needed after reload */
9585 if (stack_realign_drap)
9587 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9589 /* Only need to push parameter pointer reg if it is caller saved. */
9590 if (!call_used_regs[REGNO (crtl->drap_reg)])
9592 /* Push arg pointer reg */
9593 insn = emit_insn (gen_push (crtl->drap_reg));
9594 RTX_FRAME_RELATED_P (insn) = 1;
9597 /* Grab the argument pointer. */
9598 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9599 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9600 RTX_FRAME_RELATED_P (insn) = 1;
9601 m->fs.cfa_reg = crtl->drap_reg;
9602 m->fs.cfa_offset = 0;
9604 /* Align the stack. */
9605 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9607 GEN_INT (-align_bytes)));
9608 RTX_FRAME_RELATED_P (insn) = 1;
9610 /* Replicate the return address on the stack so that return
9611 address can be reached via (argp - 1) slot. This is needed
9612 to implement macro RETURN_ADDR_RTX and intrinsic function
9613 expand_builtin_return_addr etc. */
9614 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9615 t = gen_frame_mem (Pmode, t);
9616 insn = emit_insn (gen_push (t));
9617 RTX_FRAME_RELATED_P (insn) = 1;
9619 /* For the purposes of frame and register save area addressing,
9620 we've started over with a new frame. */
9621 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9622 m->fs.realigned = true;
9625 if (frame_pointer_needed && !m->fs.fp_valid)
9627 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9628 slower on all targets. Also sdb doesn't like it. */
9629 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9630 RTX_FRAME_RELATED_P (insn) = 1;
9632 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9633 RTX_FRAME_RELATED_P (insn) = 1;
9635 if (m->fs.cfa_reg == stack_pointer_rtx)
9636 m->fs.cfa_reg = hard_frame_pointer_rtx;
9637 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9638 m->fs.fp_offset = m->fs.sp_offset;
9639 m->fs.fp_valid = true;
9642 int_registers_saved = (frame.nregs == 0);
9644 if (!int_registers_saved)
9646 /* If saving registers via PUSH, do so now. */
9647 if (!frame.save_regs_using_mov)
9649 ix86_emit_save_regs ();
9650 int_registers_saved = true;
9651 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9654 /* When using red zone we may start register saving before allocating
9655 the stack frame saving one cycle of the prologue. However, avoid
9656 doing this if we have to probe the stack; at least on x86_64 the
9657 stack probe can turn into a call that clobbers a red zone location. */
9658 else if (ix86_using_red_zone ()
9659 && (! TARGET_STACK_PROBE
9660 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9662 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9663 int_registers_saved = true;
9667 if (stack_realign_fp)
9669 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9670 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9672 /* The computation of the size of the re-aligned stack frame means
9673 that we must allocate the size of the register save area before
9674 performing the actual alignment. Otherwise we cannot guarantee
9675 that there's enough storage above the realignment point. */
9676 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9677 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9678 GEN_INT (m->fs.sp_offset
9679 - frame.sse_reg_save_offset),
9682 /* Align the stack. */
9683 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9685 GEN_INT (-align_bytes)));
9687 /* For the purposes of register save area addressing, the stack
9688 pointer is no longer valid. As for the value of sp_offset,
9689 see ix86_compute_frame_layout, which we need to match in order
9690 to pass verification of stack_pointer_offset at the end. */
9691 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9692 m->fs.sp_valid = false;
9695 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9697 if (flag_stack_usage)
9699 /* We start to count from ARG_POINTER. */
9700 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9702 /* If it was realigned, take into account the fake frame. */
9703 if (stack_realign_drap)
9705 if (ix86_static_chain_on_stack)
9706 stack_size += UNITS_PER_WORD;
9708 if (!call_used_regs[REGNO (crtl->drap_reg)])
9709 stack_size += UNITS_PER_WORD;
9711 /* This over-estimates by 1 minimal-stack-alignment-unit but
9712 mitigates that by counting in the new return address slot. */
9713 current_function_dynamic_stack_size
9714 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9717 current_function_static_stack_size = stack_size;
9720 /* The stack has already been decremented by the instruction calling us
9721 so we need to probe unconditionally to preserve the protection area. */
9722 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9724 /* We expect the registers to be saved when probes are used. */
9725 gcc_assert (int_registers_saved);
9727 if (STACK_CHECK_MOVING_SP)
9729 ix86_adjust_stack_and_probe (allocate);
9734 HOST_WIDE_INT size = allocate;
9736 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9737 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9739 if (TARGET_STACK_PROBE)
9740 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9742 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9748 else if (!ix86_target_stack_probe ()
9749 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9751 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9752 GEN_INT (-allocate), -1,
9753 m->fs.cfa_reg == stack_pointer_rtx);
9757 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9759 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
9761 bool eax_live = false;
9762 bool r10_live = false;
9765 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9766 if (!TARGET_64BIT_MS_ABI)
9767 eax_live = ix86_eax_live_at_start_p ();
9771 emit_insn (gen_push (eax));
9772 allocate -= UNITS_PER_WORD;
9776 r10 = gen_rtx_REG (Pmode, R10_REG);
9777 emit_insn (gen_push (r10));
9778 allocate -= UNITS_PER_WORD;
9781 emit_move_insn (eax, GEN_INT (allocate));
9782 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9784 /* Use the fact that AX still contains ALLOCATE. */
9785 adjust_stack_insn = (TARGET_64BIT
9786 ? gen_pro_epilogue_adjust_stack_di_sub
9787 : gen_pro_epilogue_adjust_stack_si_sub);
9789 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
9790 stack_pointer_rtx, eax));
9792 if (m->fs.cfa_reg == stack_pointer_rtx)
9794 m->fs.cfa_offset += allocate;
9796 RTX_FRAME_RELATED_P (insn) = 1;
9797 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9798 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9799 plus_constant (stack_pointer_rtx,
9802 m->fs.sp_offset += allocate;
9804 if (r10_live && eax_live)
9806 t = choose_baseaddr (m->fs.sp_offset - allocate);
9807 emit_move_insn (r10, gen_frame_mem (Pmode, t));
9808 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
9809 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9811 else if (eax_live || r10_live)
9813 t = choose_baseaddr (m->fs.sp_offset - allocate);
9814 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
9817 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9819 if (!int_registers_saved)
9820 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9822 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9824 pic_reg_used = false;
9825 if (pic_offset_table_rtx
9826 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9829 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9831 if (alt_pic_reg_used != INVALID_REGNUM)
9832 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9834 pic_reg_used = true;
9841 if (ix86_cmodel == CM_LARGE_PIC)
9843 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9844 rtx label = gen_label_rtx ();
9846 LABEL_PRESERVE_P (label) = 1;
9847 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9848 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9849 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9850 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9851 pic_offset_table_rtx, tmp_reg));
9854 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9857 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9860 /* In the pic_reg_used case, make sure that the got load isn't deleted
9861 when mcount needs it. Blockage to avoid call movement across mcount
9862 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9864 if (crtl->profile && !flag_fentry && pic_reg_used)
9865 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9867 if (crtl->drap_reg && !crtl->stack_realign_needed)
9869 /* vDRAP is setup but after reload it turns out stack realign
9870 isn't necessary, here we will emit prologue to setup DRAP
9871 without stack realign adjustment */
9872 t = choose_baseaddr (0);
9873 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9876 /* Prevent instructions from being scheduled into register save push
9877 sequence when access to the redzone area is done through frame pointer.
9878 The offset between the frame pointer and the stack pointer is calculated
9879 relative to the value of the stack pointer at the end of the function
9880 prologue, and moving instructions that access redzone area via frame
9881 pointer inside push sequence violates this assumption. */
9882 if (frame_pointer_needed && frame.red_zone_size)
9883 emit_insn (gen_memory_blockage ());
9885 /* Emit cld instruction if stringops are used in the function. */
9886 if (TARGET_CLD && ix86_current_function_needs_cld)
9887 emit_insn (gen_cld ());
9890 /* Emit code to restore REG using a POP insn. */
9893 ix86_emit_restore_reg_using_pop (rtx reg)
9895 struct machine_function *m = cfun->machine;
9896 rtx insn = emit_insn (gen_pop (reg));
9898 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9899 m->fs.sp_offset -= UNITS_PER_WORD;
9901 if (m->fs.cfa_reg == crtl->drap_reg
9902 && REGNO (reg) == REGNO (crtl->drap_reg))
9904 /* Previously we'd represented the CFA as an expression
9905 like *(%ebp - 8). We've just popped that value from
9906 the stack, which means we need to reset the CFA to
9907 the drap register. This will remain until we restore
9908 the stack pointer. */
9909 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9910 RTX_FRAME_RELATED_P (insn) = 1;
9912 /* This means that the DRAP register is valid for addressing too. */
9913 m->fs.drap_valid = true;
9917 if (m->fs.cfa_reg == stack_pointer_rtx)
9919 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9920 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9921 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9922 RTX_FRAME_RELATED_P (insn) = 1;
9924 m->fs.cfa_offset -= UNITS_PER_WORD;
9927 /* When the frame pointer is the CFA, and we pop it, we are
9928 swapping back to the stack pointer as the CFA. This happens
9929 for stack frames that don't allocate other data, so we assume
9930 the stack pointer is now pointing at the return address, i.e.
9931 the function entry state, which makes the offset be 1 word. */
9932 if (reg == hard_frame_pointer_rtx)
9934 m->fs.fp_valid = false;
9935 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9937 m->fs.cfa_reg = stack_pointer_rtx;
9938 m->fs.cfa_offset -= UNITS_PER_WORD;
9940 add_reg_note (insn, REG_CFA_DEF_CFA,
9941 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9942 GEN_INT (m->fs.cfa_offset)));
9943 RTX_FRAME_RELATED_P (insn) = 1;
9948 /* Emit code to restore saved registers using POP insns. */
9951 ix86_emit_restore_regs_using_pop (void)
9955 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9956 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9957 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9960 /* Emit code and notes for the LEAVE instruction. */
9963 ix86_emit_leave (void)
9965 struct machine_function *m = cfun->machine;
9966 rtx insn = emit_insn (ix86_gen_leave ());
9968 ix86_add_queued_cfa_restore_notes (insn);
9970 gcc_assert (m->fs.fp_valid);
9971 m->fs.sp_valid = true;
9972 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9973 m->fs.fp_valid = false;
9975 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9977 m->fs.cfa_reg = stack_pointer_rtx;
9978 m->fs.cfa_offset = m->fs.sp_offset;
9980 add_reg_note (insn, REG_CFA_DEF_CFA,
9981 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9982 RTX_FRAME_RELATED_P (insn) = 1;
9983 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9988 /* Emit code to restore saved registers using MOV insns.
9989 First register is restored from CFA - CFA_OFFSET. */
9991 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9992 int maybe_eh_return)
9994 struct machine_function *m = cfun->machine;
9997 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9998 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10000 rtx reg = gen_rtx_REG (Pmode, regno);
10003 mem = choose_baseaddr (cfa_offset);
10004 mem = gen_frame_mem (Pmode, mem);
10005 insn = emit_move_insn (reg, mem);
10007 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10009 /* Previously we'd represented the CFA as an expression
10010 like *(%ebp - 8). We've just popped that value from
10011 the stack, which means we need to reset the CFA to
10012 the drap register. This will remain until we restore
10013 the stack pointer. */
10014 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10015 RTX_FRAME_RELATED_P (insn) = 1;
10017 /* This means that the DRAP register is valid for addressing. */
10018 m->fs.drap_valid = true;
10021 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10023 cfa_offset -= UNITS_PER_WORD;
10027 /* Emit code to restore saved registers using MOV insns.
10028 First register is restored from CFA - CFA_OFFSET. */
10030 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10031 int maybe_eh_return)
10033 unsigned int regno;
10035 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10036 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10038 rtx reg = gen_rtx_REG (V4SFmode, regno);
10041 mem = choose_baseaddr (cfa_offset);
10042 mem = gen_rtx_MEM (V4SFmode, mem);
10043 set_mem_align (mem, 128);
10044 emit_move_insn (reg, mem);
10046 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10052 /* Restore function stack, frame, and registers. */
10055 ix86_expand_epilogue (int style)
10057 struct machine_function *m = cfun->machine;
10058 struct machine_frame_state frame_state_save = m->fs;
10059 struct ix86_frame frame;
10060 bool restore_regs_via_mov;
10063 ix86_finalize_stack_realign_flags ();
10064 ix86_compute_frame_layout (&frame);
10066 m->fs.sp_valid = (!frame_pointer_needed
10067 || (current_function_sp_is_unchanging
10068 && !stack_realign_fp));
10069 gcc_assert (!m->fs.sp_valid
10070 || m->fs.sp_offset == frame.stack_pointer_offset);
10072 /* The FP must be valid if the frame pointer is present. */
10073 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10074 gcc_assert (!m->fs.fp_valid
10075 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10077 /* We must have *some* valid pointer to the stack frame. */
10078 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10080 /* The DRAP is never valid at this point. */
10081 gcc_assert (!m->fs.drap_valid);
10083 /* See the comment about red zone and frame
10084 pointer usage in ix86_expand_prologue. */
10085 if (frame_pointer_needed && frame.red_zone_size)
10086 emit_insn (gen_memory_blockage ());
10088 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10089 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10091 /* Determine the CFA offset of the end of the red-zone. */
10092 m->fs.red_zone_offset = 0;
10093 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10095 /* The red-zone begins below the return address. */
10096 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10098 /* When the register save area is in the aligned portion of
10099 the stack, determine the maximum runtime displacement that
10100 matches up with the aligned frame. */
10101 if (stack_realign_drap)
10102 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10106 /* Special care must be taken for the normal return case of a function
10107 using eh_return: the eax and edx registers are marked as saved, but
10108 not restored along this path. Adjust the save location to match. */
10109 if (crtl->calls_eh_return && style != 2)
10110 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10112 /* If we're only restoring one register and sp is not valid then
10113 using a move instruction to restore the register since it's
10114 less work than reloading sp and popping the register. */
10115 if (!m->fs.sp_valid && frame.nregs <= 1)
10116 restore_regs_via_mov = true;
10117 /* EH_RETURN requires the use of moves to function properly. */
10118 else if (crtl->calls_eh_return)
10119 restore_regs_via_mov = true;
10120 else if (TARGET_EPILOGUE_USING_MOVE
10121 && cfun->machine->use_fast_prologue_epilogue
10122 && (frame.nregs > 1
10123 || m->fs.sp_offset != frame.reg_save_offset))
10124 restore_regs_via_mov = true;
10125 else if (frame_pointer_needed
10127 && m->fs.sp_offset != frame.reg_save_offset)
10128 restore_regs_via_mov = true;
10129 else if (frame_pointer_needed
10130 && TARGET_USE_LEAVE
10131 && cfun->machine->use_fast_prologue_epilogue
10132 && frame.nregs == 1)
10133 restore_regs_via_mov = true;
10135 restore_regs_via_mov = false;
10137 if (restore_regs_via_mov || frame.nsseregs)
10139 /* Ensure that the entire register save area is addressable via
10140 the stack pointer, if we will restore via sp. */
10142 && m->fs.sp_offset > 0x7fffffff
10143 && !(m->fs.fp_valid || m->fs.drap_valid)
10144 && (frame.nsseregs + frame.nregs) != 0)
10146 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10147 GEN_INT (m->fs.sp_offset
10148 - frame.sse_reg_save_offset),
10150 m->fs.cfa_reg == stack_pointer_rtx);
10154 /* If there are any SSE registers to restore, then we have to do it
10155 via moves, since there's obviously no pop for SSE regs. */
10156 if (frame.nsseregs)
10157 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10160 if (restore_regs_via_mov)
10165 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10167 /* eh_return epilogues need %ecx added to the stack pointer. */
10170 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10172 /* Stack align doesn't work with eh_return. */
10173 gcc_assert (!stack_realign_drap);
10174 /* Neither does regparm nested functions. */
10175 gcc_assert (!ix86_static_chain_on_stack);
10177 if (frame_pointer_needed)
10179 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10180 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10181 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10183 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10184 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10186 /* Note that we use SA as a temporary CFA, as the return
10187 address is at the proper place relative to it. We
10188 pretend this happens at the FP restore insn because
10189 prior to this insn the FP would be stored at the wrong
10190 offset relative to SA, and after this insn we have no
10191 other reasonable register to use for the CFA. We don't
10192 bother resetting the CFA to the SP for the duration of
10193 the return insn. */
10194 add_reg_note (insn, REG_CFA_DEF_CFA,
10195 plus_constant (sa, UNITS_PER_WORD));
10196 ix86_add_queued_cfa_restore_notes (insn);
10197 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10198 RTX_FRAME_RELATED_P (insn) = 1;
10200 m->fs.cfa_reg = sa;
10201 m->fs.cfa_offset = UNITS_PER_WORD;
10202 m->fs.fp_valid = false;
10204 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10205 const0_rtx, style, false);
10209 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10210 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10211 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10212 ix86_add_queued_cfa_restore_notes (insn);
10214 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10215 if (m->fs.cfa_offset != UNITS_PER_WORD)
10217 m->fs.cfa_offset = UNITS_PER_WORD;
10218 add_reg_note (insn, REG_CFA_DEF_CFA,
10219 plus_constant (stack_pointer_rtx,
10221 RTX_FRAME_RELATED_P (insn) = 1;
10224 m->fs.sp_offset = UNITS_PER_WORD;
10225 m->fs.sp_valid = true;
10230 /* First step is to deallocate the stack frame so that we can
10231 pop the registers. */
10232 if (!m->fs.sp_valid)
10234 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10235 GEN_INT (m->fs.fp_offset
10236 - frame.reg_save_offset),
10239 else if (m->fs.sp_offset != frame.reg_save_offset)
10241 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10242 GEN_INT (m->fs.sp_offset
10243 - frame.reg_save_offset),
10245 m->fs.cfa_reg == stack_pointer_rtx);
10248 ix86_emit_restore_regs_using_pop ();
10251 /* If we used a stack pointer and haven't already got rid of it,
10253 if (m->fs.fp_valid)
10255 /* If the stack pointer is valid and pointing at the frame
10256 pointer store address, then we only need a pop. */
10257 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10258 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10259 /* Leave results in shorter dependency chains on CPUs that are
10260 able to grok it fast. */
10261 else if (TARGET_USE_LEAVE
10262 || optimize_function_for_size_p (cfun)
10263 || !cfun->machine->use_fast_prologue_epilogue)
10264 ix86_emit_leave ();
10267 pro_epilogue_adjust_stack (stack_pointer_rtx,
10268 hard_frame_pointer_rtx,
10269 const0_rtx, style, !using_drap);
10270 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10276 int param_ptr_offset = UNITS_PER_WORD;
10279 gcc_assert (stack_realign_drap);
10281 if (ix86_static_chain_on_stack)
10282 param_ptr_offset += UNITS_PER_WORD;
10283 if (!call_used_regs[REGNO (crtl->drap_reg)])
10284 param_ptr_offset += UNITS_PER_WORD;
10286 insn = emit_insn (gen_rtx_SET
10287 (VOIDmode, stack_pointer_rtx,
10288 gen_rtx_PLUS (Pmode,
10290 GEN_INT (-param_ptr_offset))));
10291 m->fs.cfa_reg = stack_pointer_rtx;
10292 m->fs.cfa_offset = param_ptr_offset;
10293 m->fs.sp_offset = param_ptr_offset;
10294 m->fs.realigned = false;
10296 add_reg_note (insn, REG_CFA_DEF_CFA,
10297 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10298 GEN_INT (param_ptr_offset)));
10299 RTX_FRAME_RELATED_P (insn) = 1;
10301 if (!call_used_regs[REGNO (crtl->drap_reg)])
10302 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10305 /* At this point the stack pointer must be valid, and we must have
10306 restored all of the registers. We may not have deallocated the
10307 entire stack frame. We've delayed this until now because it may
10308 be possible to merge the local stack deallocation with the
10309 deallocation forced by ix86_static_chain_on_stack. */
10310 gcc_assert (m->fs.sp_valid);
10311 gcc_assert (!m->fs.fp_valid);
10312 gcc_assert (!m->fs.realigned);
10313 if (m->fs.sp_offset != UNITS_PER_WORD)
10315 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10316 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10320 /* Sibcall epilogues don't want a return instruction. */
10323 m->fs = frame_state_save;
10327 if (crtl->args.pops_args && crtl->args.size)
10329 rtx popc = GEN_INT (crtl->args.pops_args);
10331 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10332 address, do explicit add, and jump indirectly to the caller. */
10334 if (crtl->args.pops_args >= 65536)
10336 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10339 /* There is no "pascal" calling convention in any 64bit ABI. */
10340 gcc_assert (!TARGET_64BIT);
10342 insn = emit_insn (gen_pop (ecx));
10343 m->fs.cfa_offset -= UNITS_PER_WORD;
10344 m->fs.sp_offset -= UNITS_PER_WORD;
10346 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10347 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10348 add_reg_note (insn, REG_CFA_REGISTER,
10349 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10350 RTX_FRAME_RELATED_P (insn) = 1;
10352 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10354 emit_jump_insn (gen_return_indirect_internal (ecx));
10357 emit_jump_insn (gen_return_pop_internal (popc));
10360 emit_jump_insn (gen_return_internal ());
10362 /* Restore the state back to the state from the prologue,
10363 so that it's correct for the next epilogue. */
10364 m->fs = frame_state_save;
10367 /* Reset from the function's potential modifications. */
10370 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10371 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10373 if (pic_offset_table_rtx)
10374 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10376 /* Mach-O doesn't support labels at the end of objects, so if
10377 it looks like we might want one, insert a NOP. */
10379 rtx insn = get_last_insn ();
10382 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10383 insn = PREV_INSN (insn);
10387 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10388 fputs ("\tnop\n", file);
10394 /* Return a scratch register to use in the split stack prologue. The
10395 split stack prologue is used for -fsplit-stack. It is the first
10396 instructions in the function, even before the regular prologue.
10397 The scratch register can be any caller-saved register which is not
10398 used for parameters or for the static chain. */
10400 static unsigned int
10401 split_stack_prologue_scratch_regno (void)
10410 is_fastcall = (lookup_attribute ("fastcall",
10411 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10413 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10417 if (DECL_STATIC_CHAIN (cfun->decl))
10419 sorry ("-fsplit-stack does not support fastcall with "
10420 "nested function");
10421 return INVALID_REGNUM;
10425 else if (regparm < 3)
10427 if (!DECL_STATIC_CHAIN (cfun->decl))
10433 sorry ("-fsplit-stack does not support 2 register "
10434 " parameters for a nested function");
10435 return INVALID_REGNUM;
10442 /* FIXME: We could make this work by pushing a register
10443 around the addition and comparison. */
10444 sorry ("-fsplit-stack does not support 3 register parameters");
10445 return INVALID_REGNUM;
10450 /* A SYMBOL_REF for the function which allocates new stackspace for
10453 static GTY(()) rtx split_stack_fn;
10455 /* Handle -fsplit-stack. These are the first instructions in the
10456 function, even before the regular prologue. */
10459 ix86_expand_split_stack_prologue (void)
10461 struct ix86_frame frame;
10462 HOST_WIDE_INT allocate;
10464 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10465 rtx scratch_reg = NULL_RTX;
10466 rtx varargs_label = NULL_RTX;
10468 gcc_assert (flag_split_stack && reload_completed);
10470 ix86_finalize_stack_realign_flags ();
10471 ix86_compute_frame_layout (&frame);
10472 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10474 /* This is the label we will branch to if we have enough stack
10475 space. We expect the basic block reordering pass to reverse this
10476 branch if optimizing, so that we branch in the unlikely case. */
10477 label = gen_label_rtx ();
10479 /* We need to compare the stack pointer minus the frame size with
10480 the stack boundary in the TCB. The stack boundary always gives
10481 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10482 can compare directly. Otherwise we need to do an addition. */
10484 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10485 UNSPEC_STACK_CHECK);
10486 limit = gen_rtx_CONST (Pmode, limit);
10487 limit = gen_rtx_MEM (Pmode, limit);
10488 if (allocate < SPLIT_STACK_AVAILABLE)
10489 current = stack_pointer_rtx;
10492 unsigned int scratch_regno;
10495 /* We need a scratch register to hold the stack pointer minus
10496 the required frame size. Since this is the very start of the
10497 function, the scratch register can be any caller-saved
10498 register which is not used for parameters. */
10499 offset = GEN_INT (- allocate);
10500 scratch_regno = split_stack_prologue_scratch_regno ();
10501 if (scratch_regno == INVALID_REGNUM)
10503 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10504 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10506 /* We don't use ix86_gen_add3 in this case because it will
10507 want to split to lea, but when not optimizing the insn
10508 will not be split after this point. */
10509 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10510 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10515 emit_move_insn (scratch_reg, offset);
10516 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10517 stack_pointer_rtx));
10519 current = scratch_reg;
10522 ix86_expand_branch (GEU, current, limit, label);
10523 jump_insn = get_last_insn ();
10524 JUMP_LABEL (jump_insn) = label;
10526 /* Mark the jump as very likely to be taken. */
10527 add_reg_note (jump_insn, REG_BR_PROB,
10528 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10530 /* Get more stack space. We pass in the desired stack space and the
10531 size of the arguments to copy to the new stack. In 32-bit mode
10532 we push the parameters; __morestack will return on a new stack
10533 anyhow. In 64-bit mode we pass the parameters in r10 and
10535 allocate_rtx = GEN_INT (allocate);
10536 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10537 call_fusage = NULL_RTX;
10542 reg = gen_rtx_REG (Pmode, R10_REG);
10544 /* If this function uses a static chain, it will be in %r10.
10545 Preserve it across the call to __morestack. */
10546 if (DECL_STATIC_CHAIN (cfun->decl))
10550 rax = gen_rtx_REG (Pmode, AX_REG);
10551 emit_move_insn (rax, reg);
10552 use_reg (&call_fusage, rax);
10555 emit_move_insn (reg, allocate_rtx);
10556 use_reg (&call_fusage, reg);
10557 reg = gen_rtx_REG (Pmode, R11_REG);
10558 emit_move_insn (reg, GEN_INT (args_size));
10559 use_reg (&call_fusage, reg);
10563 emit_insn (gen_push (GEN_INT (args_size)));
10564 emit_insn (gen_push (allocate_rtx));
10566 if (split_stack_fn == NULL_RTX)
10567 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10568 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
10569 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10571 add_function_usage_to (call_insn, call_fusage);
10573 /* In order to make call/return prediction work right, we now need
10574 to execute a return instruction. See
10575 libgcc/config/i386/morestack.S for the details on how this works.
10577 For flow purposes gcc must not see this as a return
10578 instruction--we need control flow to continue at the subsequent
10579 label. Therefore, we use an unspec. */
10580 gcc_assert (crtl->args.pops_args < 65536);
10581 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10583 /* If we are in 64-bit mode and this function uses a static chain,
10584 we saved %r10 in %rax before calling _morestack. */
10585 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10586 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10587 gen_rtx_REG (Pmode, AX_REG));
10589 /* If this function calls va_start, we need to store a pointer to
10590 the arguments on the old stack, because they may not have been
10591 all copied to the new stack. At this point the old stack can be
10592 found at the frame pointer value used by __morestack, because
10593 __morestack has set that up before calling back to us. Here we
10594 store that pointer in a scratch register, and in
10595 ix86_expand_prologue we store the scratch register in a stack
10597 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10599 unsigned int scratch_regno;
10603 scratch_regno = split_stack_prologue_scratch_regno ();
10604 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10605 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10609 return address within this function
10610 return address of caller of this function
10612 So we add three words to get to the stack arguments.
10616 return address within this function
10617 first argument to __morestack
10618 second argument to __morestack
10619 return address of caller of this function
10621 So we add five words to get to the stack arguments.
10623 words = TARGET_64BIT ? 3 : 5;
10624 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10625 gen_rtx_PLUS (Pmode, frame_reg,
10626 GEN_INT (words * UNITS_PER_WORD))));
10628 varargs_label = gen_label_rtx ();
10629 emit_jump_insn (gen_jump (varargs_label));
10630 JUMP_LABEL (get_last_insn ()) = varargs_label;
10635 emit_label (label);
10636 LABEL_NUSES (label) = 1;
10638 /* If this function calls va_start, we now have to set the scratch
10639 register for the case where we do not call __morestack. In this
10640 case we need to set it based on the stack pointer. */
10641 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10643 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10644 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10645 GEN_INT (UNITS_PER_WORD))));
10647 emit_label (varargs_label);
10648 LABEL_NUSES (varargs_label) = 1;
10652 /* We may have to tell the dataflow pass that the split stack prologue
10653 is initializing a scratch register. */
10656 ix86_live_on_entry (bitmap regs)
10658 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10660 gcc_assert (flag_split_stack);
10661 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10665 /* Extract the parts of an RTL expression that is a valid memory address
10666 for an instruction. Return 0 if the structure of the address is
10667 grossly off. Return -1 if the address contains ASHIFT, so it is not
10668 strictly valid, but still used for computing length of lea instruction. */
10671 ix86_decompose_address (rtx addr, struct ix86_address *out)
10673 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10674 rtx base_reg, index_reg;
10675 HOST_WIDE_INT scale = 1;
10676 rtx scale_rtx = NULL_RTX;
10679 enum ix86_address_seg seg = SEG_DEFAULT;
10681 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10683 else if (GET_CODE (addr) == PLUS)
10685 rtx addends[4], op;
10693 addends[n++] = XEXP (op, 1);
10696 while (GET_CODE (op) == PLUS);
10701 for (i = n; i >= 0; --i)
10704 switch (GET_CODE (op))
10709 index = XEXP (op, 0);
10710 scale_rtx = XEXP (op, 1);
10716 index = XEXP (op, 0);
10717 tmp = XEXP (op, 1);
10718 if (!CONST_INT_P (tmp))
10720 scale = INTVAL (tmp);
10721 if ((unsigned HOST_WIDE_INT) scale > 3)
10723 scale = 1 << scale;
10727 if (XINT (op, 1) == UNSPEC_TP
10728 && TARGET_TLS_DIRECT_SEG_REFS
10729 && seg == SEG_DEFAULT)
10730 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10759 else if (GET_CODE (addr) == MULT)
10761 index = XEXP (addr, 0); /* index*scale */
10762 scale_rtx = XEXP (addr, 1);
10764 else if (GET_CODE (addr) == ASHIFT)
10766 /* We're called for lea too, which implements ashift on occasion. */
10767 index = XEXP (addr, 0);
10768 tmp = XEXP (addr, 1);
10769 if (!CONST_INT_P (tmp))
10771 scale = INTVAL (tmp);
10772 if ((unsigned HOST_WIDE_INT) scale > 3)
10774 scale = 1 << scale;
10778 disp = addr; /* displacement */
10780 /* Extract the integral value of scale. */
10783 if (!CONST_INT_P (scale_rtx))
10785 scale = INTVAL (scale_rtx);
10788 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10789 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10791 /* Avoid useless 0 displacement. */
10792 if (disp == const0_rtx && (base || index))
10795 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10796 if (base_reg && index_reg && scale == 1
10797 && (index_reg == arg_pointer_rtx
10798 || index_reg == frame_pointer_rtx
10799 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10802 tmp = base, base = index, index = tmp;
10803 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10806 /* Special case: %ebp cannot be encoded as a base without a displacement.
10810 && (base_reg == hard_frame_pointer_rtx
10811 || base_reg == frame_pointer_rtx
10812 || base_reg == arg_pointer_rtx
10813 || (REG_P (base_reg)
10814 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10815 || REGNO (base_reg) == R13_REG))))
10818 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10819 Avoid this by transforming to [%esi+0].
10820 Reload calls address legitimization without cfun defined, so we need
10821 to test cfun for being non-NULL. */
10822 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10823 && base_reg && !index_reg && !disp
10824 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10827 /* Special case: encode reg+reg instead of reg*2. */
10828 if (!base && index && scale == 2)
10829 base = index, base_reg = index_reg, scale = 1;
10831 /* Special case: scaling cannot be encoded without base or displacement. */
10832 if (!base && !disp && index && scale != 1)
10836 out->index = index;
10838 out->scale = scale;
10844 /* Return cost of the memory address x.
10845 For i386, it is better to use a complex address than let gcc copy
10846 the address into a reg and make a new pseudo. But not if the address
10847 requires to two regs - that would mean more pseudos with longer
10850 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10852 struct ix86_address parts;
10854 int ok = ix86_decompose_address (x, &parts);
10858 if (parts.base && GET_CODE (parts.base) == SUBREG)
10859 parts.base = SUBREG_REG (parts.base);
10860 if (parts.index && GET_CODE (parts.index) == SUBREG)
10861 parts.index = SUBREG_REG (parts.index);
10863 /* Attempt to minimize number of registers in the address. */
10865 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10867 && (!REG_P (parts.index)
10868 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10872 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10874 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10875 && parts.base != parts.index)
10878 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10879 since it's predecode logic can't detect the length of instructions
10880 and it degenerates to vector decoded. Increase cost of such
10881 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10882 to split such addresses or even refuse such addresses at all.
10884 Following addressing modes are affected:
10889 The first and last case may be avoidable by explicitly coding the zero in
10890 memory address, but I don't have AMD-K6 machine handy to check this
10894 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10895 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10896 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10902 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10903 this is used for to form addresses to local data when -fPIC is in
10907 darwin_local_data_pic (rtx disp)
10909 return (GET_CODE (disp) == UNSPEC
10910 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10913 /* Determine if a given RTX is a valid constant. We already know this
10914 satisfies CONSTANT_P. */
10917 legitimate_constant_p (rtx x)
10919 switch (GET_CODE (x))
10924 if (GET_CODE (x) == PLUS)
10926 if (!CONST_INT_P (XEXP (x, 1)))
10931 if (TARGET_MACHO && darwin_local_data_pic (x))
10934 /* Only some unspecs are valid as "constants". */
10935 if (GET_CODE (x) == UNSPEC)
10936 switch (XINT (x, 1))
10939 case UNSPEC_GOTOFF:
10940 case UNSPEC_PLTOFF:
10941 return TARGET_64BIT;
10943 case UNSPEC_NTPOFF:
10944 x = XVECEXP (x, 0, 0);
10945 return (GET_CODE (x) == SYMBOL_REF
10946 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10947 case UNSPEC_DTPOFF:
10948 x = XVECEXP (x, 0, 0);
10949 return (GET_CODE (x) == SYMBOL_REF
10950 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10955 /* We must have drilled down to a symbol. */
10956 if (GET_CODE (x) == LABEL_REF)
10958 if (GET_CODE (x) != SYMBOL_REF)
10963 /* TLS symbols are never valid. */
10964 if (SYMBOL_REF_TLS_MODEL (x))
10967 /* DLLIMPORT symbols are never valid. */
10968 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10969 && SYMBOL_REF_DLLIMPORT_P (x))
10974 if (GET_MODE (x) == TImode
10975 && x != CONST0_RTX (TImode)
10981 if (!standard_sse_constant_p (x))
10988 /* Otherwise we handle everything else in the move patterns. */
10992 /* Determine if it's legal to put X into the constant pool. This
10993 is not possible for the address of thread-local symbols, which
10994 is checked above. */
10997 ix86_cannot_force_const_mem (rtx x)
10999 /* We can always put integral constants and vectors in memory. */
11000 switch (GET_CODE (x))
11010 return !legitimate_constant_p (x);
11014 /* Nonzero if the constant value X is a legitimate general operand
11015 when generating PIC code. It is given that flag_pic is on and
11016 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11019 legitimate_pic_operand_p (rtx x)
11023 switch (GET_CODE (x))
11026 inner = XEXP (x, 0);
11027 if (GET_CODE (inner) == PLUS
11028 && CONST_INT_P (XEXP (inner, 1)))
11029 inner = XEXP (inner, 0);
11031 /* Only some unspecs are valid as "constants". */
11032 if (GET_CODE (inner) == UNSPEC)
11033 switch (XINT (inner, 1))
11036 case UNSPEC_GOTOFF:
11037 case UNSPEC_PLTOFF:
11038 return TARGET_64BIT;
11040 x = XVECEXP (inner, 0, 0);
11041 return (GET_CODE (x) == SYMBOL_REF
11042 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11043 case UNSPEC_MACHOPIC_OFFSET:
11044 return legitimate_pic_address_disp_p (x);
11052 return legitimate_pic_address_disp_p (x);
11059 /* Determine if a given CONST RTX is a valid memory displacement
11063 legitimate_pic_address_disp_p (rtx disp)
11067 /* In 64bit mode we can allow direct addresses of symbols and labels
11068 when they are not dynamic symbols. */
11071 rtx op0 = disp, op1;
11073 switch (GET_CODE (disp))
11079 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11081 op0 = XEXP (XEXP (disp, 0), 0);
11082 op1 = XEXP (XEXP (disp, 0), 1);
11083 if (!CONST_INT_P (op1)
11084 || INTVAL (op1) >= 16*1024*1024
11085 || INTVAL (op1) < -16*1024*1024)
11087 if (GET_CODE (op0) == LABEL_REF)
11089 if (GET_CODE (op0) != SYMBOL_REF)
11094 /* TLS references should always be enclosed in UNSPEC. */
11095 if (SYMBOL_REF_TLS_MODEL (op0))
11097 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11098 && ix86_cmodel != CM_LARGE_PIC)
11106 if (GET_CODE (disp) != CONST)
11108 disp = XEXP (disp, 0);
11112 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11113 of GOT tables. We should not need these anyway. */
11114 if (GET_CODE (disp) != UNSPEC
11115 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11116 && XINT (disp, 1) != UNSPEC_GOTOFF
11117 && XINT (disp, 1) != UNSPEC_PLTOFF))
11120 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11121 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11127 if (GET_CODE (disp) == PLUS)
11129 if (!CONST_INT_P (XEXP (disp, 1)))
11131 disp = XEXP (disp, 0);
11135 if (TARGET_MACHO && darwin_local_data_pic (disp))
11138 if (GET_CODE (disp) != UNSPEC)
11141 switch (XINT (disp, 1))
11146 /* We need to check for both symbols and labels because VxWorks loads
11147 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11149 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11150 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11151 case UNSPEC_GOTOFF:
11152 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11153 While ABI specify also 32bit relocation but we don't produce it in
11154 small PIC model at all. */
11155 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11156 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11158 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11160 case UNSPEC_GOTTPOFF:
11161 case UNSPEC_GOTNTPOFF:
11162 case UNSPEC_INDNTPOFF:
11165 disp = XVECEXP (disp, 0, 0);
11166 return (GET_CODE (disp) == SYMBOL_REF
11167 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11168 case UNSPEC_NTPOFF:
11169 disp = XVECEXP (disp, 0, 0);
11170 return (GET_CODE (disp) == SYMBOL_REF
11171 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11172 case UNSPEC_DTPOFF:
11173 disp = XVECEXP (disp, 0, 0);
11174 return (GET_CODE (disp) == SYMBOL_REF
11175 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11181 /* Recognizes RTL expressions that are valid memory addresses for an
11182 instruction. The MODE argument is the machine mode for the MEM
11183 expression that wants to use this address.
11185 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11186 convert common non-canonical forms to canonical form so that they will
11190 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11191 rtx addr, bool strict)
11193 struct ix86_address parts;
11194 rtx base, index, disp;
11195 HOST_WIDE_INT scale;
11197 if (ix86_decompose_address (addr, &parts) <= 0)
11198 /* Decomposition failed. */
11202 index = parts.index;
11204 scale = parts.scale;
11206 /* Validate base register.
11208 Don't allow SUBREG's that span more than a word here. It can lead to spill
11209 failures when the base is one word out of a two word structure, which is
11210 represented internally as a DImode int. */
11218 else if (GET_CODE (base) == SUBREG
11219 && REG_P (SUBREG_REG (base))
11220 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11222 reg = SUBREG_REG (base);
11224 /* Base is not a register. */
11227 if (GET_MODE (base) != Pmode)
11228 /* Base is not in Pmode. */
11231 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11232 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11233 /* Base is not valid. */
11237 /* Validate index register.
11239 Don't allow SUBREG's that span more than a word here -- same as above. */
11247 else if (GET_CODE (index) == SUBREG
11248 && REG_P (SUBREG_REG (index))
11249 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11251 reg = SUBREG_REG (index);
11253 /* Index is not a register. */
11256 if (GET_MODE (index) != Pmode)
11257 /* Index is not in Pmode. */
11260 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11261 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11262 /* Index is not valid. */
11266 /* Validate scale factor. */
11270 /* Scale without index. */
11273 if (scale != 2 && scale != 4 && scale != 8)
11274 /* Scale is not a valid multiplier. */
11278 /* Validate displacement. */
11281 if (GET_CODE (disp) == CONST
11282 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11283 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11284 switch (XINT (XEXP (disp, 0), 1))
11286 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11287 used. While ABI specify also 32bit relocations, we don't produce
11288 them at all and use IP relative instead. */
11290 case UNSPEC_GOTOFF:
11291 gcc_assert (flag_pic);
11293 goto is_legitimate_pic;
11295 /* 64bit address unspec. */
11298 case UNSPEC_GOTPCREL:
11299 gcc_assert (flag_pic);
11300 goto is_legitimate_pic;
11302 case UNSPEC_GOTTPOFF:
11303 case UNSPEC_GOTNTPOFF:
11304 case UNSPEC_INDNTPOFF:
11305 case UNSPEC_NTPOFF:
11306 case UNSPEC_DTPOFF:
11309 case UNSPEC_STACK_CHECK:
11310 gcc_assert (flag_split_stack);
11314 /* Invalid address unspec. */
11318 else if (SYMBOLIC_CONST (disp)
11322 && MACHOPIC_INDIRECT
11323 && !machopic_operand_p (disp)
11329 if (TARGET_64BIT && (index || base))
11331 /* foo@dtpoff(%rX) is ok. */
11332 if (GET_CODE (disp) != CONST
11333 || GET_CODE (XEXP (disp, 0)) != PLUS
11334 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11335 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11336 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11337 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11338 /* Non-constant pic memory reference. */
11341 else if (! legitimate_pic_address_disp_p (disp))
11342 /* Displacement is an invalid pic construct. */
11345 /* This code used to verify that a symbolic pic displacement
11346 includes the pic_offset_table_rtx register.
11348 While this is good idea, unfortunately these constructs may
11349 be created by "adds using lea" optimization for incorrect
11358 This code is nonsensical, but results in addressing
11359 GOT table with pic_offset_table_rtx base. We can't
11360 just refuse it easily, since it gets matched by
11361 "addsi3" pattern, that later gets split to lea in the
11362 case output register differs from input. While this
11363 can be handled by separate addsi pattern for this case
11364 that never results in lea, this seems to be easier and
11365 correct fix for crash to disable this test. */
11367 else if (GET_CODE (disp) != LABEL_REF
11368 && !CONST_INT_P (disp)
11369 && (GET_CODE (disp) != CONST
11370 || !legitimate_constant_p (disp))
11371 && (GET_CODE (disp) != SYMBOL_REF
11372 || !legitimate_constant_p (disp)))
11373 /* Displacement is not constant. */
11375 else if (TARGET_64BIT
11376 && !x86_64_immediate_operand (disp, VOIDmode))
11377 /* Displacement is out of range. */
11381 /* Everything looks valid. */
11385 /* Determine if a given RTX is a valid constant address. */
11388 constant_address_p (rtx x)
11390 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11393 /* Return a unique alias set for the GOT. */
11395 static alias_set_type
11396 ix86_GOT_alias_set (void)
11398 static alias_set_type set = -1;
11400 set = new_alias_set ();
11404 /* Return a legitimate reference for ORIG (an address) using the
11405 register REG. If REG is 0, a new pseudo is generated.
11407 There are two types of references that must be handled:
11409 1. Global data references must load the address from the GOT, via
11410 the PIC reg. An insn is emitted to do this load, and the reg is
11413 2. Static data references, constant pool addresses, and code labels
11414 compute the address as an offset from the GOT, whose base is in
11415 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11416 differentiate them from global data objects. The returned
11417 address is the PIC reg + an unspec constant.
11419 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11420 reg also appears in the address. */
11423 legitimize_pic_address (rtx orig, rtx reg)
11426 rtx new_rtx = orig;
11430 if (TARGET_MACHO && !TARGET_64BIT)
11433 reg = gen_reg_rtx (Pmode);
11434 /* Use the generic Mach-O PIC machinery. */
11435 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11439 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11441 else if (TARGET_64BIT
11442 && ix86_cmodel != CM_SMALL_PIC
11443 && gotoff_operand (addr, Pmode))
11446 /* This symbol may be referenced via a displacement from the PIC
11447 base address (@GOTOFF). */
11449 if (reload_in_progress)
11450 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11451 if (GET_CODE (addr) == CONST)
11452 addr = XEXP (addr, 0);
11453 if (GET_CODE (addr) == PLUS)
11455 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11457 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11460 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11461 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11463 tmpreg = gen_reg_rtx (Pmode);
11466 emit_move_insn (tmpreg, new_rtx);
11470 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11471 tmpreg, 1, OPTAB_DIRECT);
11474 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11476 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11478 /* This symbol may be referenced via a displacement from the PIC
11479 base address (@GOTOFF). */
11481 if (reload_in_progress)
11482 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11483 if (GET_CODE (addr) == CONST)
11484 addr = XEXP (addr, 0);
11485 if (GET_CODE (addr) == PLUS)
11487 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11489 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11492 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11493 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11494 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11498 emit_move_insn (reg, new_rtx);
11502 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11503 /* We can't use @GOTOFF for text labels on VxWorks;
11504 see gotoff_operand. */
11505 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11507 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11509 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11510 return legitimize_dllimport_symbol (addr, true);
11511 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11512 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11513 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11515 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11516 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11520 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11522 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11523 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11524 new_rtx = gen_const_mem (Pmode, new_rtx);
11525 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11528 reg = gen_reg_rtx (Pmode);
11529 /* Use directly gen_movsi, otherwise the address is loaded
11530 into register for CSE. We don't want to CSE this addresses,
11531 instead we CSE addresses from the GOT table, so skip this. */
11532 emit_insn (gen_movsi (reg, new_rtx));
11537 /* This symbol must be referenced via a load from the
11538 Global Offset Table (@GOT). */
11540 if (reload_in_progress)
11541 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11542 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11543 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11545 new_rtx = force_reg (Pmode, new_rtx);
11546 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11547 new_rtx = gen_const_mem (Pmode, new_rtx);
11548 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11551 reg = gen_reg_rtx (Pmode);
11552 emit_move_insn (reg, new_rtx);
11558 if (CONST_INT_P (addr)
11559 && !x86_64_immediate_operand (addr, VOIDmode))
11563 emit_move_insn (reg, addr);
11567 new_rtx = force_reg (Pmode, addr);
11569 else if (GET_CODE (addr) == CONST)
11571 addr = XEXP (addr, 0);
11573 /* We must match stuff we generate before. Assume the only
11574 unspecs that can get here are ours. Not that we could do
11575 anything with them anyway.... */
11576 if (GET_CODE (addr) == UNSPEC
11577 || (GET_CODE (addr) == PLUS
11578 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11580 gcc_assert (GET_CODE (addr) == PLUS);
11582 if (GET_CODE (addr) == PLUS)
11584 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11586 /* Check first to see if this is a constant offset from a @GOTOFF
11587 symbol reference. */
11588 if (gotoff_operand (op0, Pmode)
11589 && CONST_INT_P (op1))
11593 if (reload_in_progress)
11594 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11595 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11597 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11598 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11599 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11603 emit_move_insn (reg, new_rtx);
11609 if (INTVAL (op1) < -16*1024*1024
11610 || INTVAL (op1) >= 16*1024*1024)
11612 if (!x86_64_immediate_operand (op1, Pmode))
11613 op1 = force_reg (Pmode, op1);
11614 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11620 base = legitimize_pic_address (XEXP (addr, 0), reg);
11621 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11622 base == reg ? NULL_RTX : reg);
11624 if (CONST_INT_P (new_rtx))
11625 new_rtx = plus_constant (base, INTVAL (new_rtx));
11628 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11630 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11631 new_rtx = XEXP (new_rtx, 1);
11633 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11641 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11644 get_thread_pointer (int to_reg)
11648 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11652 reg = gen_reg_rtx (Pmode);
11653 insn = gen_rtx_SET (VOIDmode, reg, tp);
11654 insn = emit_insn (insn);
11659 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11660 false if we expect this to be used for a memory address and true if
11661 we expect to load the address into a register. */
11664 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11666 rtx dest, base, off, pic, tp;
11671 case TLS_MODEL_GLOBAL_DYNAMIC:
11672 dest = gen_reg_rtx (Pmode);
11673 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11675 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11677 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11680 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11681 insns = get_insns ();
11684 RTL_CONST_CALL_P (insns) = 1;
11685 emit_libcall_block (insns, dest, rax, x);
11687 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11688 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11690 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11692 if (TARGET_GNU2_TLS)
11694 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11696 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11700 case TLS_MODEL_LOCAL_DYNAMIC:
11701 base = gen_reg_rtx (Pmode);
11702 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11704 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11706 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11709 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11710 insns = get_insns ();
11713 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11714 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11715 RTL_CONST_CALL_P (insns) = 1;
11716 emit_libcall_block (insns, base, rax, note);
11718 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11719 emit_insn (gen_tls_local_dynamic_base_64 (base));
11721 emit_insn (gen_tls_local_dynamic_base_32 (base));
11723 if (TARGET_GNU2_TLS)
11725 rtx x = ix86_tls_module_base ();
11727 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11728 gen_rtx_MINUS (Pmode, x, tp));
11731 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11732 off = gen_rtx_CONST (Pmode, off);
11734 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11736 if (TARGET_GNU2_TLS)
11738 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11740 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11745 case TLS_MODEL_INITIAL_EXEC:
11749 type = UNSPEC_GOTNTPOFF;
11753 if (reload_in_progress)
11754 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11755 pic = pic_offset_table_rtx;
11756 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11758 else if (!TARGET_ANY_GNU_TLS)
11760 pic = gen_reg_rtx (Pmode);
11761 emit_insn (gen_set_got (pic));
11762 type = UNSPEC_GOTTPOFF;
11767 type = UNSPEC_INDNTPOFF;
11770 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11771 off = gen_rtx_CONST (Pmode, off);
11773 off = gen_rtx_PLUS (Pmode, pic, off);
11774 off = gen_const_mem (Pmode, off);
11775 set_mem_alias_set (off, ix86_GOT_alias_set ());
11777 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11779 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11780 off = force_reg (Pmode, off);
11781 return gen_rtx_PLUS (Pmode, base, off);
11785 base = get_thread_pointer (true);
11786 dest = gen_reg_rtx (Pmode);
11787 emit_insn (gen_subsi3 (dest, base, off));
11791 case TLS_MODEL_LOCAL_EXEC:
11792 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11793 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11794 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11795 off = gen_rtx_CONST (Pmode, off);
11797 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11799 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11800 return gen_rtx_PLUS (Pmode, base, off);
11804 base = get_thread_pointer (true);
11805 dest = gen_reg_rtx (Pmode);
11806 emit_insn (gen_subsi3 (dest, base, off));
11811 gcc_unreachable ();
11817 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11820 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11821 htab_t dllimport_map;
11824 get_dllimport_decl (tree decl)
11826 struct tree_map *h, in;
11829 const char *prefix;
11830 size_t namelen, prefixlen;
11835 if (!dllimport_map)
11836 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11838 in.hash = htab_hash_pointer (decl);
11839 in.base.from = decl;
11840 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11841 h = (struct tree_map *) *loc;
11845 *loc = h = ggc_alloc_tree_map ();
11847 h->base.from = decl;
11848 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11849 VAR_DECL, NULL, ptr_type_node);
11850 DECL_ARTIFICIAL (to) = 1;
11851 DECL_IGNORED_P (to) = 1;
11852 DECL_EXTERNAL (to) = 1;
11853 TREE_READONLY (to) = 1;
11855 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11856 name = targetm.strip_name_encoding (name);
11857 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11858 ? "*__imp_" : "*__imp__";
11859 namelen = strlen (name);
11860 prefixlen = strlen (prefix);
11861 imp_name = (char *) alloca (namelen + prefixlen + 1);
11862 memcpy (imp_name, prefix, prefixlen);
11863 memcpy (imp_name + prefixlen, name, namelen + 1);
11865 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11866 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11867 SET_SYMBOL_REF_DECL (rtl, to);
11868 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11870 rtl = gen_const_mem (Pmode, rtl);
11871 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11873 SET_DECL_RTL (to, rtl);
11874 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11879 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11880 true if we require the result be a register. */
11883 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11888 gcc_assert (SYMBOL_REF_DECL (symbol));
11889 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11891 x = DECL_RTL (imp_decl);
11893 x = force_reg (Pmode, x);
11897 /* Try machine-dependent ways of modifying an illegitimate address
11898 to be legitimate. If we find one, return the new, valid address.
11899 This macro is used in only one place: `memory_address' in explow.c.
11901 OLDX is the address as it was before break_out_memory_refs was called.
11902 In some cases it is useful to look at this to decide what needs to be done.
11904 It is always safe for this macro to do nothing. It exists to recognize
11905 opportunities to optimize the output.
11907 For the 80386, we handle X+REG by loading X into a register R and
11908 using R+REG. R will go in a general reg and indexing will be used.
11909 However, if REG is a broken-out memory address or multiplication,
11910 nothing needs to be done because REG can certainly go in a general reg.
11912 When -fpic is used, special handling is needed for symbolic references.
11913 See comments by legitimize_pic_address in i386.c for details. */
11916 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11917 enum machine_mode mode)
11922 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11924 return legitimize_tls_address (x, (enum tls_model) log, false);
11925 if (GET_CODE (x) == CONST
11926 && GET_CODE (XEXP (x, 0)) == PLUS
11927 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11928 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11930 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11931 (enum tls_model) log, false);
11932 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11935 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11937 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11938 return legitimize_dllimport_symbol (x, true);
11939 if (GET_CODE (x) == CONST
11940 && GET_CODE (XEXP (x, 0)) == PLUS
11941 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11942 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11944 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11945 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11949 if (flag_pic && SYMBOLIC_CONST (x))
11950 return legitimize_pic_address (x, 0);
11952 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11953 if (GET_CODE (x) == ASHIFT
11954 && CONST_INT_P (XEXP (x, 1))
11955 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11958 log = INTVAL (XEXP (x, 1));
11959 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11960 GEN_INT (1 << log));
11963 if (GET_CODE (x) == PLUS)
11965 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11967 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11968 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11969 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11972 log = INTVAL (XEXP (XEXP (x, 0), 1));
11973 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11974 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11975 GEN_INT (1 << log));
11978 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11979 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11980 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11983 log = INTVAL (XEXP (XEXP (x, 1), 1));
11984 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11985 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11986 GEN_INT (1 << log));
11989 /* Put multiply first if it isn't already. */
11990 if (GET_CODE (XEXP (x, 1)) == MULT)
11992 rtx tmp = XEXP (x, 0);
11993 XEXP (x, 0) = XEXP (x, 1);
11998 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11999 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12000 created by virtual register instantiation, register elimination, and
12001 similar optimizations. */
12002 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12005 x = gen_rtx_PLUS (Pmode,
12006 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12007 XEXP (XEXP (x, 1), 0)),
12008 XEXP (XEXP (x, 1), 1));
12012 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12013 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12014 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12015 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12016 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12017 && CONSTANT_P (XEXP (x, 1)))
12020 rtx other = NULL_RTX;
12022 if (CONST_INT_P (XEXP (x, 1)))
12024 constant = XEXP (x, 1);
12025 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12027 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12029 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12030 other = XEXP (x, 1);
12038 x = gen_rtx_PLUS (Pmode,
12039 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12040 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12041 plus_constant (other, INTVAL (constant)));
12045 if (changed && ix86_legitimate_address_p (mode, x, false))
12048 if (GET_CODE (XEXP (x, 0)) == MULT)
12051 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12054 if (GET_CODE (XEXP (x, 1)) == MULT)
12057 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12061 && REG_P (XEXP (x, 1))
12062 && REG_P (XEXP (x, 0)))
12065 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12068 x = legitimize_pic_address (x, 0);
12071 if (changed && ix86_legitimate_address_p (mode, x, false))
12074 if (REG_P (XEXP (x, 0)))
12076 rtx temp = gen_reg_rtx (Pmode);
12077 rtx val = force_operand (XEXP (x, 1), temp);
12079 emit_move_insn (temp, val);
12081 XEXP (x, 1) = temp;
12085 else if (REG_P (XEXP (x, 1)))
12087 rtx temp = gen_reg_rtx (Pmode);
12088 rtx val = force_operand (XEXP (x, 0), temp);
12090 emit_move_insn (temp, val);
12092 XEXP (x, 0) = temp;
12100 /* Print an integer constant expression in assembler syntax. Addition
12101 and subtraction are the only arithmetic that may appear in these
12102 expressions. FILE is the stdio stream to write to, X is the rtx, and
12103 CODE is the operand print code from the output string. */
12106 output_pic_addr_const (FILE *file, rtx x, int code)
12110 switch (GET_CODE (x))
12113 gcc_assert (flag_pic);
12118 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12119 output_addr_const (file, x);
12122 const char *name = XSTR (x, 0);
12124 /* Mark the decl as referenced so that cgraph will
12125 output the function. */
12126 if (SYMBOL_REF_DECL (x))
12127 mark_decl_referenced (SYMBOL_REF_DECL (x));
12130 if (MACHOPIC_INDIRECT
12131 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12132 name = machopic_indirection_name (x, /*stub_p=*/true);
12134 assemble_name (file, name);
12136 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12137 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12138 fputs ("@PLT", file);
12145 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12146 assemble_name (asm_out_file, buf);
12150 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12154 /* This used to output parentheses around the expression,
12155 but that does not work on the 386 (either ATT or BSD assembler). */
12156 output_pic_addr_const (file, XEXP (x, 0), code);
12160 if (GET_MODE (x) == VOIDmode)
12162 /* We can use %d if the number is <32 bits and positive. */
12163 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12164 fprintf (file, "0x%lx%08lx",
12165 (unsigned long) CONST_DOUBLE_HIGH (x),
12166 (unsigned long) CONST_DOUBLE_LOW (x));
12168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12171 /* We can't handle floating point constants;
12172 TARGET_PRINT_OPERAND must handle them. */
12173 output_operand_lossage ("floating constant misused");
12177 /* Some assemblers need integer constants to appear first. */
12178 if (CONST_INT_P (XEXP (x, 0)))
12180 output_pic_addr_const (file, XEXP (x, 0), code);
12182 output_pic_addr_const (file, XEXP (x, 1), code);
12186 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12187 output_pic_addr_const (file, XEXP (x, 1), code);
12189 output_pic_addr_const (file, XEXP (x, 0), code);
12195 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12196 output_pic_addr_const (file, XEXP (x, 0), code);
12198 output_pic_addr_const (file, XEXP (x, 1), code);
12200 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12204 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12206 bool f = i386_asm_output_addr_const_extra (file, x);
12211 gcc_assert (XVECLEN (x, 0) == 1);
12212 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12213 switch (XINT (x, 1))
12216 fputs ("@GOT", file);
12218 case UNSPEC_GOTOFF:
12219 fputs ("@GOTOFF", file);
12221 case UNSPEC_PLTOFF:
12222 fputs ("@PLTOFF", file);
12224 case UNSPEC_GOTPCREL:
12225 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12226 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12228 case UNSPEC_GOTTPOFF:
12229 /* FIXME: This might be @TPOFF in Sun ld too. */
12230 fputs ("@gottpoff", file);
12233 fputs ("@tpoff", file);
12235 case UNSPEC_NTPOFF:
12237 fputs ("@tpoff", file);
12239 fputs ("@ntpoff", file);
12241 case UNSPEC_DTPOFF:
12242 fputs ("@dtpoff", file);
12244 case UNSPEC_GOTNTPOFF:
12246 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12247 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12249 fputs ("@gotntpoff", file);
12251 case UNSPEC_INDNTPOFF:
12252 fputs ("@indntpoff", file);
12255 case UNSPEC_MACHOPIC_OFFSET:
12257 machopic_output_function_base_name (file);
12261 output_operand_lossage ("invalid UNSPEC as operand");
12267 output_operand_lossage ("invalid expression as operand");
12271 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12272 We need to emit DTP-relative relocations. */
12274 static void ATTRIBUTE_UNUSED
12275 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12277 fputs (ASM_LONG, file);
12278 output_addr_const (file, x);
12279 fputs ("@dtpoff", file);
12285 fputs (", 0", file);
12288 gcc_unreachable ();
12292 /* Return true if X is a representation of the PIC register. This copes
12293 with calls from ix86_find_base_term, where the register might have
12294 been replaced by a cselib value. */
12297 ix86_pic_register_p (rtx x)
12299 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12300 return (pic_offset_table_rtx
12301 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12303 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12306 /* In the name of slightly smaller debug output, and to cater to
12307 general assembler lossage, recognize PIC+GOTOFF and turn it back
12308 into a direct symbol reference.
12310 On Darwin, this is necessary to avoid a crash, because Darwin
12311 has a different PIC label for each routine but the DWARF debugging
12312 information is not associated with any particular routine, so it's
12313 necessary to remove references to the PIC label from RTL stored by
12314 the DWARF output code. */
12317 ix86_delegitimize_address (rtx x)
12319 rtx orig_x = delegitimize_mem_from_attrs (x);
12320 /* addend is NULL or some rtx if x is something+GOTOFF where
12321 something doesn't include the PIC register. */
12322 rtx addend = NULL_RTX;
12323 /* reg_addend is NULL or a multiple of some register. */
12324 rtx reg_addend = NULL_RTX;
12325 /* const_addend is NULL or a const_int. */
12326 rtx const_addend = NULL_RTX;
12327 /* This is the result, or NULL. */
12328 rtx result = NULL_RTX;
12337 if (GET_CODE (x) != CONST
12338 || GET_CODE (XEXP (x, 0)) != UNSPEC
12339 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12340 || !MEM_P (orig_x))
12342 x = XVECEXP (XEXP (x, 0), 0, 0);
12343 if (GET_MODE (orig_x) != Pmode)
12344 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12348 if (GET_CODE (x) != PLUS
12349 || GET_CODE (XEXP (x, 1)) != CONST)
12352 if (ix86_pic_register_p (XEXP (x, 0)))
12353 /* %ebx + GOT/GOTOFF */
12355 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12357 /* %ebx + %reg * scale + GOT/GOTOFF */
12358 reg_addend = XEXP (x, 0);
12359 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12360 reg_addend = XEXP (reg_addend, 1);
12361 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12362 reg_addend = XEXP (reg_addend, 0);
12365 reg_addend = NULL_RTX;
12366 addend = XEXP (x, 0);
12370 addend = XEXP (x, 0);
12372 x = XEXP (XEXP (x, 1), 0);
12373 if (GET_CODE (x) == PLUS
12374 && CONST_INT_P (XEXP (x, 1)))
12376 const_addend = XEXP (x, 1);
12380 if (GET_CODE (x) == UNSPEC
12381 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12382 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12383 result = XVECEXP (x, 0, 0);
12385 if (TARGET_MACHO && darwin_local_data_pic (x)
12386 && !MEM_P (orig_x))
12387 result = XVECEXP (x, 0, 0);
12393 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12395 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12398 /* If the rest of original X doesn't involve the PIC register, add
12399 addend and subtract pic_offset_table_rtx. This can happen e.g.
12401 leal (%ebx, %ecx, 4), %ecx
12403 movl foo@GOTOFF(%ecx), %edx
12404 in which case we return (%ecx - %ebx) + foo. */
12405 if (pic_offset_table_rtx)
12406 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12407 pic_offset_table_rtx),
12412 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12413 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12417 /* If X is a machine specific address (i.e. a symbol or label being
12418 referenced as a displacement from the GOT implemented using an
12419 UNSPEC), then return the base term. Otherwise return X. */
12422 ix86_find_base_term (rtx x)
12428 if (GET_CODE (x) != CONST)
12430 term = XEXP (x, 0);
12431 if (GET_CODE (term) == PLUS
12432 && (CONST_INT_P (XEXP (term, 1))
12433 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12434 term = XEXP (term, 0);
12435 if (GET_CODE (term) != UNSPEC
12436 || XINT (term, 1) != UNSPEC_GOTPCREL)
12439 return XVECEXP (term, 0, 0);
12442 return ix86_delegitimize_address (x);
12446 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12447 int fp, FILE *file)
12449 const char *suffix;
12451 if (mode == CCFPmode || mode == CCFPUmode)
12453 code = ix86_fp_compare_code_to_integer (code);
12457 code = reverse_condition (code);
12508 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12512 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12513 Those same assemblers have the same but opposite lossage on cmov. */
12514 if (mode == CCmode)
12515 suffix = fp ? "nbe" : "a";
12516 else if (mode == CCCmode)
12519 gcc_unreachable ();
12535 gcc_unreachable ();
12539 gcc_assert (mode == CCmode || mode == CCCmode);
12556 gcc_unreachable ();
12560 /* ??? As above. */
12561 gcc_assert (mode == CCmode || mode == CCCmode);
12562 suffix = fp ? "nb" : "ae";
12565 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12569 /* ??? As above. */
12570 if (mode == CCmode)
12572 else if (mode == CCCmode)
12573 suffix = fp ? "nb" : "ae";
12575 gcc_unreachable ();
12578 suffix = fp ? "u" : "p";
12581 suffix = fp ? "nu" : "np";
12584 gcc_unreachable ();
12586 fputs (suffix, file);
12589 /* Print the name of register X to FILE based on its machine mode and number.
12590 If CODE is 'w', pretend the mode is HImode.
12591 If CODE is 'b', pretend the mode is QImode.
12592 If CODE is 'k', pretend the mode is SImode.
12593 If CODE is 'q', pretend the mode is DImode.
12594 If CODE is 'x', pretend the mode is V4SFmode.
12595 If CODE is 't', pretend the mode is V8SFmode.
12596 If CODE is 'h', pretend the reg is the 'high' byte register.
12597 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12598 If CODE is 'd', duplicate the operand for AVX instruction.
12602 print_reg (rtx x, int code, FILE *file)
12605 bool duplicated = code == 'd' && TARGET_AVX;
12607 gcc_assert (x == pc_rtx
12608 || (REGNO (x) != ARG_POINTER_REGNUM
12609 && REGNO (x) != FRAME_POINTER_REGNUM
12610 && REGNO (x) != FLAGS_REG
12611 && REGNO (x) != FPSR_REG
12612 && REGNO (x) != FPCR_REG));
12614 if (ASSEMBLER_DIALECT == ASM_ATT)
12619 gcc_assert (TARGET_64BIT);
12620 fputs ("rip", file);
12624 if (code == 'w' || MMX_REG_P (x))
12626 else if (code == 'b')
12628 else if (code == 'k')
12630 else if (code == 'q')
12632 else if (code == 'y')
12634 else if (code == 'h')
12636 else if (code == 'x')
12638 else if (code == 't')
12641 code = GET_MODE_SIZE (GET_MODE (x));
12643 /* Irritatingly, AMD extended registers use different naming convention
12644 from the normal registers. */
12645 if (REX_INT_REG_P (x))
12647 gcc_assert (TARGET_64BIT);
12651 error ("extended registers have no high halves");
12654 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12657 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12660 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12663 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12666 error ("unsupported operand size for extended register");
12676 if (STACK_TOP_P (x))
12685 if (! ANY_FP_REG_P (x))
12686 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12691 reg = hi_reg_name[REGNO (x)];
12694 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12696 reg = qi_reg_name[REGNO (x)];
12699 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12701 reg = qi_high_reg_name[REGNO (x)];
12706 gcc_assert (!duplicated);
12708 fputs (hi_reg_name[REGNO (x)] + 1, file);
12713 gcc_unreachable ();
12719 if (ASSEMBLER_DIALECT == ASM_ATT)
12720 fprintf (file, ", %%%s", reg);
12722 fprintf (file, ", %s", reg);
12726 /* Locate some local-dynamic symbol still in use by this function
12727 so that we can print its name in some tls_local_dynamic_base
12731 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12735 if (GET_CODE (x) == SYMBOL_REF
12736 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12738 cfun->machine->some_ld_name = XSTR (x, 0);
12745 static const char *
12746 get_some_local_dynamic_name (void)
12750 if (cfun->machine->some_ld_name)
12751 return cfun->machine->some_ld_name;
12753 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12754 if (NONDEBUG_INSN_P (insn)
12755 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12756 return cfun->machine->some_ld_name;
12761 /* Meaning of CODE:
12762 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12763 C -- print opcode suffix for set/cmov insn.
12764 c -- like C, but print reversed condition
12765 F,f -- likewise, but for floating-point.
12766 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12768 R -- print the prefix for register names.
12769 z -- print the opcode suffix for the size of the current operand.
12770 Z -- likewise, with special suffixes for x87 instructions.
12771 * -- print a star (in certain assembler syntax)
12772 A -- print an absolute memory reference.
12773 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12774 s -- print a shift double count, followed by the assemblers argument
12776 b -- print the QImode name of the register for the indicated operand.
12777 %b0 would print %al if operands[0] is reg 0.
12778 w -- likewise, print the HImode name of the register.
12779 k -- likewise, print the SImode name of the register.
12780 q -- likewise, print the DImode name of the register.
12781 x -- likewise, print the V4SFmode name of the register.
12782 t -- likewise, print the V8SFmode name of the register.
12783 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12784 y -- print "st(0)" instead of "st" as a register.
12785 d -- print duplicated register operand for AVX instruction.
12786 D -- print condition for SSE cmp instruction.
12787 P -- if PIC, print an @PLT suffix.
12788 X -- don't print any sort of PIC '@' suffix for a symbol.
12789 & -- print some in-use local-dynamic symbol name.
12790 H -- print a memory address offset by 8; used for sse high-parts
12791 Y -- print condition for XOP pcom* instruction.
12792 + -- print a branch hint as 'cs' or 'ds' prefix
12793 ; -- print a semicolon (after prefixes due to bug in older gas).
12794 @ -- print a segment register of thread base pointer load
12798 ix86_print_operand (FILE *file, rtx x, int code)
12805 if (ASSEMBLER_DIALECT == ASM_ATT)
12811 const char *name = get_some_local_dynamic_name ();
12813 output_operand_lossage ("'%%&' used without any "
12814 "local dynamic TLS references");
12816 assemble_name (file, name);
12821 switch (ASSEMBLER_DIALECT)
12828 /* Intel syntax. For absolute addresses, registers should not
12829 be surrounded by braces. */
12833 ix86_print_operand (file, x, 0);
12840 gcc_unreachable ();
12843 ix86_print_operand (file, x, 0);
12848 if (ASSEMBLER_DIALECT == ASM_ATT)
12853 if (ASSEMBLER_DIALECT == ASM_ATT)
12858 if (ASSEMBLER_DIALECT == ASM_ATT)
12863 if (ASSEMBLER_DIALECT == ASM_ATT)
12868 if (ASSEMBLER_DIALECT == ASM_ATT)
12873 if (ASSEMBLER_DIALECT == ASM_ATT)
12878 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12880 /* Opcodes don't get size suffixes if using Intel opcodes. */
12881 if (ASSEMBLER_DIALECT == ASM_INTEL)
12884 switch (GET_MODE_SIZE (GET_MODE (x)))
12903 output_operand_lossage
12904 ("invalid operand size for operand code '%c'", code);
12909 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12911 (0, "non-integer operand used with operand code '%c'", code);
12915 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12916 if (ASSEMBLER_DIALECT == ASM_INTEL)
12919 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12921 switch (GET_MODE_SIZE (GET_MODE (x)))
12924 #ifdef HAVE_AS_IX86_FILDS
12934 #ifdef HAVE_AS_IX86_FILDQ
12937 fputs ("ll", file);
12945 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12947 /* 387 opcodes don't get size suffixes
12948 if the operands are registers. */
12949 if (STACK_REG_P (x))
12952 switch (GET_MODE_SIZE (GET_MODE (x)))
12973 output_operand_lossage
12974 ("invalid operand type used with operand code '%c'", code);
12978 output_operand_lossage
12979 ("invalid operand size for operand code '%c'", code);
12996 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12998 ix86_print_operand (file, x, 0);
12999 fputs (", ", file);
13004 /* Little bit of braindamage here. The SSE compare instructions
13005 does use completely different names for the comparisons that the
13006 fp conditional moves. */
13009 switch (GET_CODE (x))
13012 fputs ("eq", file);
13015 fputs ("eq_us", file);
13018 fputs ("lt", file);
13021 fputs ("nge", file);
13024 fputs ("le", file);
13027 fputs ("ngt", file);
13030 fputs ("unord", file);
13033 fputs ("neq", file);
13036 fputs ("neq_oq", file);
13039 fputs ("ge", file);
13042 fputs ("nlt", file);
13045 fputs ("gt", file);
13048 fputs ("nle", file);
13051 fputs ("ord", file);
13054 output_operand_lossage ("operand is not a condition code, "
13055 "invalid operand code 'D'");
13061 switch (GET_CODE (x))
13065 fputs ("eq", file);
13069 fputs ("lt", file);
13073 fputs ("le", file);
13076 fputs ("unord", file);
13080 fputs ("neq", file);
13084 fputs ("nlt", file);
13088 fputs ("nle", file);
13091 fputs ("ord", file);
13094 output_operand_lossage ("operand is not a condition code, "
13095 "invalid operand code 'D'");
13101 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13102 if (ASSEMBLER_DIALECT == ASM_ATT)
13104 switch (GET_MODE (x))
13106 case HImode: putc ('w', file); break;
13108 case SFmode: putc ('l', file); break;
13110 case DFmode: putc ('q', file); break;
13111 default: gcc_unreachable ();
13118 if (!COMPARISON_P (x))
13120 output_operand_lossage ("operand is neither a constant nor a "
13121 "condition code, invalid operand code "
13125 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13128 if (!COMPARISON_P (x))
13130 output_operand_lossage ("operand is neither a constant nor a "
13131 "condition code, invalid operand code "
13135 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13136 if (ASSEMBLER_DIALECT == ASM_ATT)
13139 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13142 /* Like above, but reverse condition */
13144 /* Check to see if argument to %c is really a constant
13145 and not a condition code which needs to be reversed. */
13146 if (!COMPARISON_P (x))
13148 output_operand_lossage ("operand is neither a constant nor a "
13149 "condition code, invalid operand "
13153 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13156 if (!COMPARISON_P (x))
13158 output_operand_lossage ("operand is neither a constant nor a "
13159 "condition code, invalid operand "
13163 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13164 if (ASSEMBLER_DIALECT == ASM_ATT)
13167 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13171 /* It doesn't actually matter what mode we use here, as we're
13172 only going to use this for printing. */
13173 x = adjust_address_nv (x, DImode, 8);
13181 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13184 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13187 int pred_val = INTVAL (XEXP (x, 0));
13189 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13190 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13192 int taken = pred_val > REG_BR_PROB_BASE / 2;
13193 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13195 /* Emit hints only in the case default branch prediction
13196 heuristics would fail. */
13197 if (taken != cputaken)
13199 /* We use 3e (DS) prefix for taken branches and
13200 2e (CS) prefix for not taken branches. */
13202 fputs ("ds ; ", file);
13204 fputs ("cs ; ", file);
13212 switch (GET_CODE (x))
13215 fputs ("neq", file);
13218 fputs ("eq", file);
13222 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13226 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13230 fputs ("le", file);
13234 fputs ("lt", file);
13237 fputs ("unord", file);
13240 fputs ("ord", file);
13243 fputs ("ueq", file);
13246 fputs ("nlt", file);
13249 fputs ("nle", file);
13252 fputs ("ule", file);
13255 fputs ("ult", file);
13258 fputs ("une", file);
13261 output_operand_lossage ("operand is not a condition code, "
13262 "invalid operand code 'Y'");
13268 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13274 if (ASSEMBLER_DIALECT == ASM_ATT)
13277 /* The kernel uses a different segment register for performance
13278 reasons; a system call would not have to trash the userspace
13279 segment register, which would be expensive. */
13280 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13281 fputs ("fs", file);
13283 fputs ("gs", file);
13287 output_operand_lossage ("invalid operand code '%c'", code);
13292 print_reg (x, code, file);
13294 else if (MEM_P (x))
13296 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13297 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13298 && GET_MODE (x) != BLKmode)
13301 switch (GET_MODE_SIZE (GET_MODE (x)))
13303 case 1: size = "BYTE"; break;
13304 case 2: size = "WORD"; break;
13305 case 4: size = "DWORD"; break;
13306 case 8: size = "QWORD"; break;
13307 case 12: size = "TBYTE"; break;
13309 if (GET_MODE (x) == XFmode)
13314 case 32: size = "YMMWORD"; break;
13316 gcc_unreachable ();
13319 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13322 else if (code == 'w')
13324 else if (code == 'k')
13327 fputs (size, file);
13328 fputs (" PTR ", file);
13332 /* Avoid (%rip) for call operands. */
13333 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13334 && !CONST_INT_P (x))
13335 output_addr_const (file, x);
13336 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13337 output_operand_lossage ("invalid constraints for operand");
13339 output_address (x);
13342 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13347 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13348 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13350 if (ASSEMBLER_DIALECT == ASM_ATT)
13352 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13354 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13356 fprintf (file, "0x%08x", (unsigned int) l);
13359 /* These float cases don't actually occur as immediate operands. */
13360 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13364 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13365 fputs (dstr, file);
13368 else if (GET_CODE (x) == CONST_DOUBLE
13369 && GET_MODE (x) == XFmode)
13373 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13374 fputs (dstr, file);
13379 /* We have patterns that allow zero sets of memory, for instance.
13380 In 64-bit mode, we should probably support all 8-byte vectors,
13381 since we can in fact encode that into an immediate. */
13382 if (GET_CODE (x) == CONST_VECTOR)
13384 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13390 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13392 if (ASSEMBLER_DIALECT == ASM_ATT)
13395 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13396 || GET_CODE (x) == LABEL_REF)
13398 if (ASSEMBLER_DIALECT == ASM_ATT)
13401 fputs ("OFFSET FLAT:", file);
13404 if (CONST_INT_P (x))
13405 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13407 output_pic_addr_const (file, x, code);
13409 output_addr_const (file, x);
13414 ix86_print_operand_punct_valid_p (unsigned char code)
13416 return (code == '@' || code == '*' || code == '+'
13417 || code == '&' || code == ';');
13420 /* Print a memory operand whose address is ADDR. */
13423 ix86_print_operand_address (FILE *file, rtx addr)
13425 struct ix86_address parts;
13426 rtx base, index, disp;
13428 int ok = ix86_decompose_address (addr, &parts);
13433 index = parts.index;
13435 scale = parts.scale;
13443 if (ASSEMBLER_DIALECT == ASM_ATT)
13445 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13448 gcc_unreachable ();
13451 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13452 if (TARGET_64BIT && !base && !index)
13456 if (GET_CODE (disp) == CONST
13457 && GET_CODE (XEXP (disp, 0)) == PLUS
13458 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13459 symbol = XEXP (XEXP (disp, 0), 0);
13461 if (GET_CODE (symbol) == LABEL_REF
13462 || (GET_CODE (symbol) == SYMBOL_REF
13463 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13466 if (!base && !index)
13468 /* Displacement only requires special attention. */
13470 if (CONST_INT_P (disp))
13472 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13473 fputs ("ds:", file);
13474 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13477 output_pic_addr_const (file, disp, 0);
13479 output_addr_const (file, disp);
13483 if (ASSEMBLER_DIALECT == ASM_ATT)
13488 output_pic_addr_const (file, disp, 0);
13489 else if (GET_CODE (disp) == LABEL_REF)
13490 output_asm_label (disp);
13492 output_addr_const (file, disp);
13497 print_reg (base, 0, file);
13501 print_reg (index, 0, file);
13503 fprintf (file, ",%d", scale);
13509 rtx offset = NULL_RTX;
13513 /* Pull out the offset of a symbol; print any symbol itself. */
13514 if (GET_CODE (disp) == CONST
13515 && GET_CODE (XEXP (disp, 0)) == PLUS
13516 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13518 offset = XEXP (XEXP (disp, 0), 1);
13519 disp = gen_rtx_CONST (VOIDmode,
13520 XEXP (XEXP (disp, 0), 0));
13524 output_pic_addr_const (file, disp, 0);
13525 else if (GET_CODE (disp) == LABEL_REF)
13526 output_asm_label (disp);
13527 else if (CONST_INT_P (disp))
13530 output_addr_const (file, disp);
13536 print_reg (base, 0, file);
13539 if (INTVAL (offset) >= 0)
13541 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13545 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13552 print_reg (index, 0, file);
13554 fprintf (file, "*%d", scale);
13561 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13564 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13568 if (GET_CODE (x) != UNSPEC)
13571 op = XVECEXP (x, 0, 0);
13572 switch (XINT (x, 1))
13574 case UNSPEC_GOTTPOFF:
13575 output_addr_const (file, op);
13576 /* FIXME: This might be @TPOFF in Sun ld. */
13577 fputs ("@gottpoff", file);
13580 output_addr_const (file, op);
13581 fputs ("@tpoff", file);
13583 case UNSPEC_NTPOFF:
13584 output_addr_const (file, op);
13586 fputs ("@tpoff", file);
13588 fputs ("@ntpoff", file);
13590 case UNSPEC_DTPOFF:
13591 output_addr_const (file, op);
13592 fputs ("@dtpoff", file);
13594 case UNSPEC_GOTNTPOFF:
13595 output_addr_const (file, op);
13597 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13598 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13600 fputs ("@gotntpoff", file);
13602 case UNSPEC_INDNTPOFF:
13603 output_addr_const (file, op);
13604 fputs ("@indntpoff", file);
13607 case UNSPEC_MACHOPIC_OFFSET:
13608 output_addr_const (file, op);
13610 machopic_output_function_base_name (file);
13614 case UNSPEC_STACK_CHECK:
13618 gcc_assert (flag_split_stack);
13620 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
13621 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
13623 gcc_unreachable ();
13626 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
13637 /* Split one or more double-mode RTL references into pairs of half-mode
13638 references. The RTL can be REG, offsettable MEM, integer constant, or
13639 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
13640 split and "num" is its length. lo_half and hi_half are output arrays
13641 that parallel "operands". */
13644 split_double_mode (enum machine_mode mode, rtx operands[],
13645 int num, rtx lo_half[], rtx hi_half[])
13647 enum machine_mode half_mode;
13653 half_mode = DImode;
13656 half_mode = SImode;
13659 gcc_unreachable ();
13662 byte = GET_MODE_SIZE (half_mode);
13666 rtx op = operands[num];
13668 /* simplify_subreg refuse to split volatile memory addresses,
13669 but we still have to handle it. */
13672 lo_half[num] = adjust_address (op, half_mode, 0);
13673 hi_half[num] = adjust_address (op, half_mode, byte);
13677 lo_half[num] = simplify_gen_subreg (half_mode, op,
13678 GET_MODE (op) == VOIDmode
13679 ? mode : GET_MODE (op), 0);
13680 hi_half[num] = simplify_gen_subreg (half_mode, op,
13681 GET_MODE (op) == VOIDmode
13682 ? mode : GET_MODE (op), byte);
13687 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13688 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13689 is the expression of the binary operation. The output may either be
13690 emitted here, or returned to the caller, like all output_* functions.
13692 There is no guarantee that the operands are the same mode, as they
13693 might be within FLOAT or FLOAT_EXTEND expressions. */
13695 #ifndef SYSV386_COMPAT
13696 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13697 wants to fix the assemblers because that causes incompatibility
13698 with gcc. No-one wants to fix gcc because that causes
13699 incompatibility with assemblers... You can use the option of
13700 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13701 #define SYSV386_COMPAT 1
13705 output_387_binary_op (rtx insn, rtx *operands)
13707 static char buf[40];
13710 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13712 #ifdef ENABLE_CHECKING
13713 /* Even if we do not want to check the inputs, this documents input
13714 constraints. Which helps in understanding the following code. */
13715 if (STACK_REG_P (operands[0])
13716 && ((REG_P (operands[1])
13717 && REGNO (operands[0]) == REGNO (operands[1])
13718 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13719 || (REG_P (operands[2])
13720 && REGNO (operands[0]) == REGNO (operands[2])
13721 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13722 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13725 gcc_assert (is_sse);
13728 switch (GET_CODE (operands[3]))
13731 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13732 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13740 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13741 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13749 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13750 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13758 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13759 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13767 gcc_unreachable ();
13774 strcpy (buf, ssep);
13775 if (GET_MODE (operands[0]) == SFmode)
13776 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13778 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13782 strcpy (buf, ssep + 1);
13783 if (GET_MODE (operands[0]) == SFmode)
13784 strcat (buf, "ss\t{%2, %0|%0, %2}");
13786 strcat (buf, "sd\t{%2, %0|%0, %2}");
13792 switch (GET_CODE (operands[3]))
13796 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13798 rtx temp = operands[2];
13799 operands[2] = operands[1];
13800 operands[1] = temp;
13803 /* know operands[0] == operands[1]. */
13805 if (MEM_P (operands[2]))
13811 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13813 if (STACK_TOP_P (operands[0]))
13814 /* How is it that we are storing to a dead operand[2]?
13815 Well, presumably operands[1] is dead too. We can't
13816 store the result to st(0) as st(0) gets popped on this
13817 instruction. Instead store to operands[2] (which I
13818 think has to be st(1)). st(1) will be popped later.
13819 gcc <= 2.8.1 didn't have this check and generated
13820 assembly code that the Unixware assembler rejected. */
13821 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13823 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13827 if (STACK_TOP_P (operands[0]))
13828 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13830 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13835 if (MEM_P (operands[1]))
13841 if (MEM_P (operands[2]))
13847 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13850 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13851 derived assemblers, confusingly reverse the direction of
13852 the operation for fsub{r} and fdiv{r} when the
13853 destination register is not st(0). The Intel assembler
13854 doesn't have this brain damage. Read !SYSV386_COMPAT to
13855 figure out what the hardware really does. */
13856 if (STACK_TOP_P (operands[0]))
13857 p = "{p\t%0, %2|rp\t%2, %0}";
13859 p = "{rp\t%2, %0|p\t%0, %2}";
13861 if (STACK_TOP_P (operands[0]))
13862 /* As above for fmul/fadd, we can't store to st(0). */
13863 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13865 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13870 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13873 if (STACK_TOP_P (operands[0]))
13874 p = "{rp\t%0, %1|p\t%1, %0}";
13876 p = "{p\t%1, %0|rp\t%0, %1}";
13878 if (STACK_TOP_P (operands[0]))
13879 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13881 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13886 if (STACK_TOP_P (operands[0]))
13888 if (STACK_TOP_P (operands[1]))
13889 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13891 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13894 else if (STACK_TOP_P (operands[1]))
13897 p = "{\t%1, %0|r\t%0, %1}";
13899 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13905 p = "{r\t%2, %0|\t%0, %2}";
13907 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13913 gcc_unreachable ();
13920 /* Return needed mode for entity in optimize_mode_switching pass. */
13923 ix86_mode_needed (int entity, rtx insn)
13925 enum attr_i387_cw mode;
13927 /* The mode UNINITIALIZED is used to store control word after a
13928 function call or ASM pattern. The mode ANY specify that function
13929 has no requirements on the control word and make no changes in the
13930 bits we are interested in. */
13933 || (NONJUMP_INSN_P (insn)
13934 && (asm_noperands (PATTERN (insn)) >= 0
13935 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13936 return I387_CW_UNINITIALIZED;
13938 if (recog_memoized (insn) < 0)
13939 return I387_CW_ANY;
13941 mode = get_attr_i387_cw (insn);
13946 if (mode == I387_CW_TRUNC)
13951 if (mode == I387_CW_FLOOR)
13956 if (mode == I387_CW_CEIL)
13961 if (mode == I387_CW_MASK_PM)
13966 gcc_unreachable ();
13969 return I387_CW_ANY;
13972 /* Output code to initialize control word copies used by trunc?f?i and
13973 rounding patterns. CURRENT_MODE is set to current control word,
13974 while NEW_MODE is set to new control word. */
13977 emit_i387_cw_initialization (int mode)
13979 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13982 enum ix86_stack_slot slot;
13984 rtx reg = gen_reg_rtx (HImode);
13986 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13987 emit_move_insn (reg, copy_rtx (stored_mode));
13989 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13990 || optimize_function_for_size_p (cfun))
13994 case I387_CW_TRUNC:
13995 /* round toward zero (truncate) */
13996 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13997 slot = SLOT_CW_TRUNC;
14000 case I387_CW_FLOOR:
14001 /* round down toward -oo */
14002 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14003 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14004 slot = SLOT_CW_FLOOR;
14008 /* round up toward +oo */
14009 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14010 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14011 slot = SLOT_CW_CEIL;
14014 case I387_CW_MASK_PM:
14015 /* mask precision exception for nearbyint() */
14016 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14017 slot = SLOT_CW_MASK_PM;
14021 gcc_unreachable ();
14028 case I387_CW_TRUNC:
14029 /* round toward zero (truncate) */
14030 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14031 slot = SLOT_CW_TRUNC;
14034 case I387_CW_FLOOR:
14035 /* round down toward -oo */
14036 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14037 slot = SLOT_CW_FLOOR;
14041 /* round up toward +oo */
14042 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14043 slot = SLOT_CW_CEIL;
14046 case I387_CW_MASK_PM:
14047 /* mask precision exception for nearbyint() */
14048 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14049 slot = SLOT_CW_MASK_PM;
14053 gcc_unreachable ();
14057 gcc_assert (slot < MAX_386_STACK_LOCALS);
14059 new_mode = assign_386_stack_local (HImode, slot);
14060 emit_move_insn (new_mode, reg);
14063 /* Output code for INSN to convert a float to a signed int. OPERANDS
14064 are the insn operands. The output may be [HSD]Imode and the input
14065 operand may be [SDX]Fmode. */
14068 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14070 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14071 int dimode_p = GET_MODE (operands[0]) == DImode;
14072 int round_mode = get_attr_i387_cw (insn);
14074 /* Jump through a hoop or two for DImode, since the hardware has no
14075 non-popping instruction. We used to do this a different way, but
14076 that was somewhat fragile and broke with post-reload splitters. */
14077 if ((dimode_p || fisttp) && !stack_top_dies)
14078 output_asm_insn ("fld\t%y1", operands);
14080 gcc_assert (STACK_TOP_P (operands[1]));
14081 gcc_assert (MEM_P (operands[0]));
14082 gcc_assert (GET_MODE (operands[1]) != TFmode);
14085 output_asm_insn ("fisttp%Z0\t%0", operands);
14088 if (round_mode != I387_CW_ANY)
14089 output_asm_insn ("fldcw\t%3", operands);
14090 if (stack_top_dies || dimode_p)
14091 output_asm_insn ("fistp%Z0\t%0", operands);
14093 output_asm_insn ("fist%Z0\t%0", operands);
14094 if (round_mode != I387_CW_ANY)
14095 output_asm_insn ("fldcw\t%2", operands);
14101 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14102 have the values zero or one, indicates the ffreep insn's operand
14103 from the OPERANDS array. */
14105 static const char *
14106 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14108 if (TARGET_USE_FFREEP)
14109 #ifdef HAVE_AS_IX86_FFREEP
14110 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14113 static char retval[32];
14114 int regno = REGNO (operands[opno]);
14116 gcc_assert (FP_REGNO_P (regno));
14118 regno -= FIRST_STACK_REG;
14120 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14125 return opno ? "fstp\t%y1" : "fstp\t%y0";
14129 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14130 should be used. UNORDERED_P is true when fucom should be used. */
14133 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14135 int stack_top_dies;
14136 rtx cmp_op0, cmp_op1;
14137 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14141 cmp_op0 = operands[0];
14142 cmp_op1 = operands[1];
14146 cmp_op0 = operands[1];
14147 cmp_op1 = operands[2];
14152 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14153 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14154 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14155 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14157 if (GET_MODE (operands[0]) == SFmode)
14159 return &ucomiss[TARGET_AVX ? 0 : 1];
14161 return &comiss[TARGET_AVX ? 0 : 1];
14164 return &ucomisd[TARGET_AVX ? 0 : 1];
14166 return &comisd[TARGET_AVX ? 0 : 1];
14169 gcc_assert (STACK_TOP_P (cmp_op0));
14171 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14173 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14175 if (stack_top_dies)
14177 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14178 return output_387_ffreep (operands, 1);
14181 return "ftst\n\tfnstsw\t%0";
14184 if (STACK_REG_P (cmp_op1)
14186 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14187 && REGNO (cmp_op1) != FIRST_STACK_REG)
14189 /* If both the top of the 387 stack dies, and the other operand
14190 is also a stack register that dies, then this must be a
14191 `fcompp' float compare */
14195 /* There is no double popping fcomi variant. Fortunately,
14196 eflags is immune from the fstp's cc clobbering. */
14198 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14200 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14201 return output_387_ffreep (operands, 0);
14206 return "fucompp\n\tfnstsw\t%0";
14208 return "fcompp\n\tfnstsw\t%0";
14213 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14215 static const char * const alt[16] =
14217 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14218 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14219 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14220 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14222 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14223 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14227 "fcomi\t{%y1, %0|%0, %y1}",
14228 "fcomip\t{%y1, %0|%0, %y1}",
14229 "fucomi\t{%y1, %0|%0, %y1}",
14230 "fucomip\t{%y1, %0|%0, %y1}",
14241 mask = eflags_p << 3;
14242 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14243 mask |= unordered_p << 1;
14244 mask |= stack_top_dies;
14246 gcc_assert (mask < 16);
14255 ix86_output_addr_vec_elt (FILE *file, int value)
14257 const char *directive = ASM_LONG;
14261 directive = ASM_QUAD;
14263 gcc_assert (!TARGET_64BIT);
14266 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14270 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14272 const char *directive = ASM_LONG;
14275 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14276 directive = ASM_QUAD;
14278 gcc_assert (!TARGET_64BIT);
14280 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14281 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14282 fprintf (file, "%s%s%d-%s%d\n",
14283 directive, LPREFIX, value, LPREFIX, rel);
14284 else if (HAVE_AS_GOTOFF_IN_DATA)
14285 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14287 else if (TARGET_MACHO)
14289 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14290 machopic_output_function_base_name (file);
14295 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14296 GOT_SYMBOL_NAME, LPREFIX, value);
14299 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14303 ix86_expand_clear (rtx dest)
14307 /* We play register width games, which are only valid after reload. */
14308 gcc_assert (reload_completed);
14310 /* Avoid HImode and its attendant prefix byte. */
14311 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14312 dest = gen_rtx_REG (SImode, REGNO (dest));
14313 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14315 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14316 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14318 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14319 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14325 /* X is an unchanging MEM. If it is a constant pool reference, return
14326 the constant pool rtx, else NULL. */
14329 maybe_get_pool_constant (rtx x)
14331 x = ix86_delegitimize_address (XEXP (x, 0));
14333 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14334 return get_pool_constant (x);
14340 ix86_expand_move (enum machine_mode mode, rtx operands[])
14343 enum tls_model model;
14348 if (GET_CODE (op1) == SYMBOL_REF)
14350 model = SYMBOL_REF_TLS_MODEL (op1);
14353 op1 = legitimize_tls_address (op1, model, true);
14354 op1 = force_operand (op1, op0);
14358 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14359 && SYMBOL_REF_DLLIMPORT_P (op1))
14360 op1 = legitimize_dllimport_symbol (op1, false);
14362 else if (GET_CODE (op1) == CONST
14363 && GET_CODE (XEXP (op1, 0)) == PLUS
14364 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14366 rtx addend = XEXP (XEXP (op1, 0), 1);
14367 rtx symbol = XEXP (XEXP (op1, 0), 0);
14370 model = SYMBOL_REF_TLS_MODEL (symbol);
14372 tmp = legitimize_tls_address (symbol, model, true);
14373 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14374 && SYMBOL_REF_DLLIMPORT_P (symbol))
14375 tmp = legitimize_dllimport_symbol (symbol, true);
14379 tmp = force_operand (tmp, NULL);
14380 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14381 op0, 1, OPTAB_DIRECT);
14387 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14389 if (TARGET_MACHO && !TARGET_64BIT)
14394 rtx temp = ((reload_in_progress
14395 || ((op0 && REG_P (op0))
14397 ? op0 : gen_reg_rtx (Pmode));
14398 op1 = machopic_indirect_data_reference (op1, temp);
14399 op1 = machopic_legitimize_pic_address (op1, mode,
14400 temp == op1 ? 0 : temp);
14402 else if (MACHOPIC_INDIRECT)
14403 op1 = machopic_indirect_data_reference (op1, 0);
14411 op1 = force_reg (Pmode, op1);
14412 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14414 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14415 op1 = legitimize_pic_address (op1, reg);
14424 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14425 || !push_operand (op0, mode))
14427 op1 = force_reg (mode, op1);
14429 if (push_operand (op0, mode)
14430 && ! general_no_elim_operand (op1, mode))
14431 op1 = copy_to_mode_reg (mode, op1);
14433 /* Force large constants in 64bit compilation into register
14434 to get them CSEed. */
14435 if (can_create_pseudo_p ()
14436 && (mode == DImode) && TARGET_64BIT
14437 && immediate_operand (op1, mode)
14438 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14439 && !register_operand (op0, mode)
14441 op1 = copy_to_mode_reg (mode, op1);
14443 if (can_create_pseudo_p ()
14444 && FLOAT_MODE_P (mode)
14445 && GET_CODE (op1) == CONST_DOUBLE)
14447 /* If we are loading a floating point constant to a register,
14448 force the value to memory now, since we'll get better code
14449 out the back end. */
14451 op1 = validize_mem (force_const_mem (mode, op1));
14452 if (!register_operand (op0, mode))
14454 rtx temp = gen_reg_rtx (mode);
14455 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14456 emit_move_insn (op0, temp);
14462 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14466 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14468 rtx op0 = operands[0], op1 = operands[1];
14469 unsigned int align = GET_MODE_ALIGNMENT (mode);
14471 /* Force constants other than zero into memory. We do not know how
14472 the instructions used to build constants modify the upper 64 bits
14473 of the register, once we have that information we may be able
14474 to handle some of them more efficiently. */
14475 if (can_create_pseudo_p ()
14476 && register_operand (op0, mode)
14477 && (CONSTANT_P (op1)
14478 || (GET_CODE (op1) == SUBREG
14479 && CONSTANT_P (SUBREG_REG (op1))))
14480 && !standard_sse_constant_p (op1))
14481 op1 = validize_mem (force_const_mem (mode, op1));
14483 /* We need to check memory alignment for SSE mode since attribute
14484 can make operands unaligned. */
14485 if (can_create_pseudo_p ()
14486 && SSE_REG_MODE_P (mode)
14487 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14488 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14492 /* ix86_expand_vector_move_misalign() does not like constants ... */
14493 if (CONSTANT_P (op1)
14494 || (GET_CODE (op1) == SUBREG
14495 && CONSTANT_P (SUBREG_REG (op1))))
14496 op1 = validize_mem (force_const_mem (mode, op1));
14498 /* ... nor both arguments in memory. */
14499 if (!register_operand (op0, mode)
14500 && !register_operand (op1, mode))
14501 op1 = force_reg (mode, op1);
14503 tmp[0] = op0; tmp[1] = op1;
14504 ix86_expand_vector_move_misalign (mode, tmp);
14508 /* Make operand1 a register if it isn't already. */
14509 if (can_create_pseudo_p ()
14510 && !register_operand (op0, mode)
14511 && !register_operand (op1, mode))
14513 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14517 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14520 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14521 straight to ix86_expand_vector_move. */
14522 /* Code generation for scalar reg-reg moves of single and double precision data:
14523 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14527 if (x86_sse_partial_reg_dependency == true)
14532 Code generation for scalar loads of double precision data:
14533 if (x86_sse_split_regs == true)
14534 movlpd mem, reg (gas syntax)
14538 Code generation for unaligned packed loads of single precision data
14539 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14540 if (x86_sse_unaligned_move_optimal)
14543 if (x86_sse_partial_reg_dependency == true)
14555 Code generation for unaligned packed loads of double precision data
14556 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14557 if (x86_sse_unaligned_move_optimal)
14560 if (x86_sse_split_regs == true)
14573 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14582 switch (GET_MODE_CLASS (mode))
14584 case MODE_VECTOR_INT:
14586 switch (GET_MODE_SIZE (mode))
14589 /* If we're optimizing for size, movups is the smallest. */
14590 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14592 op0 = gen_lowpart (V4SFmode, op0);
14593 op1 = gen_lowpart (V4SFmode, op1);
14594 emit_insn (gen_avx_movups (op0, op1));
14597 op0 = gen_lowpart (V16QImode, op0);
14598 op1 = gen_lowpart (V16QImode, op1);
14599 emit_insn (gen_avx_movdqu (op0, op1));
14602 op0 = gen_lowpart (V32QImode, op0);
14603 op1 = gen_lowpart (V32QImode, op1);
14604 emit_insn (gen_avx_movdqu256 (op0, op1));
14607 gcc_unreachable ();
14610 case MODE_VECTOR_FLOAT:
14611 op0 = gen_lowpart (mode, op0);
14612 op1 = gen_lowpart (mode, op1);
14617 emit_insn (gen_avx_movups (op0, op1));
14620 emit_insn (gen_avx_movups256 (op0, op1));
14623 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14625 op0 = gen_lowpart (V4SFmode, op0);
14626 op1 = gen_lowpart (V4SFmode, op1);
14627 emit_insn (gen_avx_movups (op0, op1));
14630 emit_insn (gen_avx_movupd (op0, op1));
14633 emit_insn (gen_avx_movupd256 (op0, op1));
14636 gcc_unreachable ();
14641 gcc_unreachable ();
14649 /* If we're optimizing for size, movups is the smallest. */
14650 if (optimize_insn_for_size_p ()
14651 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14653 op0 = gen_lowpart (V4SFmode, op0);
14654 op1 = gen_lowpart (V4SFmode, op1);
14655 emit_insn (gen_sse_movups (op0, op1));
14659 /* ??? If we have typed data, then it would appear that using
14660 movdqu is the only way to get unaligned data loaded with
14662 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14664 op0 = gen_lowpart (V16QImode, op0);
14665 op1 = gen_lowpart (V16QImode, op1);
14666 emit_insn (gen_sse2_movdqu (op0, op1));
14670 if (TARGET_SSE2 && mode == V2DFmode)
14674 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14676 op0 = gen_lowpart (V2DFmode, op0);
14677 op1 = gen_lowpart (V2DFmode, op1);
14678 emit_insn (gen_sse2_movupd (op0, op1));
14682 /* When SSE registers are split into halves, we can avoid
14683 writing to the top half twice. */
14684 if (TARGET_SSE_SPLIT_REGS)
14686 emit_clobber (op0);
14691 /* ??? Not sure about the best option for the Intel chips.
14692 The following would seem to satisfy; the register is
14693 entirely cleared, breaking the dependency chain. We
14694 then store to the upper half, with a dependency depth
14695 of one. A rumor has it that Intel recommends two movsd
14696 followed by an unpacklpd, but this is unconfirmed. And
14697 given that the dependency depth of the unpacklpd would
14698 still be one, I'm not sure why this would be better. */
14699 zero = CONST0_RTX (V2DFmode);
14702 m = adjust_address (op1, DFmode, 0);
14703 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14704 m = adjust_address (op1, DFmode, 8);
14705 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14709 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14711 op0 = gen_lowpart (V4SFmode, op0);
14712 op1 = gen_lowpart (V4SFmode, op1);
14713 emit_insn (gen_sse_movups (op0, op1));
14717 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14718 emit_move_insn (op0, CONST0_RTX (mode));
14720 emit_clobber (op0);
14722 if (mode != V4SFmode)
14723 op0 = gen_lowpart (V4SFmode, op0);
14724 m = adjust_address (op1, V2SFmode, 0);
14725 emit_insn (gen_sse_loadlps (op0, op0, m));
14726 m = adjust_address (op1, V2SFmode, 8);
14727 emit_insn (gen_sse_loadhps (op0, op0, m));
14730 else if (MEM_P (op0))
14732 /* If we're optimizing for size, movups is the smallest. */
14733 if (optimize_insn_for_size_p ()
14734 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14736 op0 = gen_lowpart (V4SFmode, op0);
14737 op1 = gen_lowpart (V4SFmode, op1);
14738 emit_insn (gen_sse_movups (op0, op1));
14742 /* ??? Similar to above, only less clear because of quote
14743 typeless stores unquote. */
14744 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14745 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14747 op0 = gen_lowpart (V16QImode, op0);
14748 op1 = gen_lowpart (V16QImode, op1);
14749 emit_insn (gen_sse2_movdqu (op0, op1));
14753 if (TARGET_SSE2 && mode == V2DFmode)
14755 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14757 op0 = gen_lowpart (V2DFmode, op0);
14758 op1 = gen_lowpart (V2DFmode, op1);
14759 emit_insn (gen_sse2_movupd (op0, op1));
14763 m = adjust_address (op0, DFmode, 0);
14764 emit_insn (gen_sse2_storelpd (m, op1));
14765 m = adjust_address (op0, DFmode, 8);
14766 emit_insn (gen_sse2_storehpd (m, op1));
14771 if (mode != V4SFmode)
14772 op1 = gen_lowpart (V4SFmode, op1);
14774 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14776 op0 = gen_lowpart (V4SFmode, op0);
14777 emit_insn (gen_sse_movups (op0, op1));
14781 m = adjust_address (op0, V2SFmode, 0);
14782 emit_insn (gen_sse_storelps (m, op1));
14783 m = adjust_address (op0, V2SFmode, 8);
14784 emit_insn (gen_sse_storehps (m, op1));
14789 gcc_unreachable ();
14792 /* Expand a push in MODE. This is some mode for which we do not support
14793 proper push instructions, at least from the registers that we expect
14794 the value to live in. */
14797 ix86_expand_push (enum machine_mode mode, rtx x)
14801 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14802 GEN_INT (-GET_MODE_SIZE (mode)),
14803 stack_pointer_rtx, 1, OPTAB_DIRECT);
14804 if (tmp != stack_pointer_rtx)
14805 emit_move_insn (stack_pointer_rtx, tmp);
14807 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14809 /* When we push an operand onto stack, it has to be aligned at least
14810 at the function argument boundary. However since we don't have
14811 the argument type, we can't determine the actual argument
14813 emit_move_insn (tmp, x);
14816 /* Helper function of ix86_fixup_binary_operands to canonicalize
14817 operand order. Returns true if the operands should be swapped. */
14820 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14823 rtx dst = operands[0];
14824 rtx src1 = operands[1];
14825 rtx src2 = operands[2];
14827 /* If the operation is not commutative, we can't do anything. */
14828 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14831 /* Highest priority is that src1 should match dst. */
14832 if (rtx_equal_p (dst, src1))
14834 if (rtx_equal_p (dst, src2))
14837 /* Next highest priority is that immediate constants come second. */
14838 if (immediate_operand (src2, mode))
14840 if (immediate_operand (src1, mode))
14843 /* Lowest priority is that memory references should come second. */
14853 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14854 destination to use for the operation. If different from the true
14855 destination in operands[0], a copy operation will be required. */
14858 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14861 rtx dst = operands[0];
14862 rtx src1 = operands[1];
14863 rtx src2 = operands[2];
14865 /* Canonicalize operand order. */
14866 if (ix86_swap_binary_operands_p (code, mode, operands))
14870 /* It is invalid to swap operands of different modes. */
14871 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14878 /* Both source operands cannot be in memory. */
14879 if (MEM_P (src1) && MEM_P (src2))
14881 /* Optimization: Only read from memory once. */
14882 if (rtx_equal_p (src1, src2))
14884 src2 = force_reg (mode, src2);
14888 src2 = force_reg (mode, src2);
14891 /* If the destination is memory, and we do not have matching source
14892 operands, do things in registers. */
14893 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14894 dst = gen_reg_rtx (mode);
14896 /* Source 1 cannot be a constant. */
14897 if (CONSTANT_P (src1))
14898 src1 = force_reg (mode, src1);
14900 /* Source 1 cannot be a non-matching memory. */
14901 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14902 src1 = force_reg (mode, src1);
14904 operands[1] = src1;
14905 operands[2] = src2;
14909 /* Similarly, but assume that the destination has already been
14910 set up properly. */
14913 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14914 enum machine_mode mode, rtx operands[])
14916 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14917 gcc_assert (dst == operands[0]);
14920 /* Attempt to expand a binary operator. Make the expansion closer to the
14921 actual machine, then just general_operand, which will allow 3 separate
14922 memory references (one output, two input) in a single insn. */
14925 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14928 rtx src1, src2, dst, op, clob;
14930 dst = ix86_fixup_binary_operands (code, mode, operands);
14931 src1 = operands[1];
14932 src2 = operands[2];
14934 /* Emit the instruction. */
14936 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14937 if (reload_in_progress)
14939 /* Reload doesn't know about the flags register, and doesn't know that
14940 it doesn't want to clobber it. We can only do this with PLUS. */
14941 gcc_assert (code == PLUS);
14946 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14947 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14950 /* Fix up the destination if needed. */
14951 if (dst != operands[0])
14952 emit_move_insn (operands[0], dst);
14955 /* Return TRUE or FALSE depending on whether the binary operator meets the
14956 appropriate constraints. */
14959 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14962 rtx dst = operands[0];
14963 rtx src1 = operands[1];
14964 rtx src2 = operands[2];
14966 /* Both source operands cannot be in memory. */
14967 if (MEM_P (src1) && MEM_P (src2))
14970 /* Canonicalize operand order for commutative operators. */
14971 if (ix86_swap_binary_operands_p (code, mode, operands))
14978 /* If the destination is memory, we must have a matching source operand. */
14979 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14982 /* Source 1 cannot be a constant. */
14983 if (CONSTANT_P (src1))
14986 /* Source 1 cannot be a non-matching memory. */
14987 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14993 /* Attempt to expand a unary operator. Make the expansion closer to the
14994 actual machine, then just general_operand, which will allow 2 separate
14995 memory references (one output, one input) in a single insn. */
14998 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15001 int matching_memory;
15002 rtx src, dst, op, clob;
15007 /* If the destination is memory, and we do not have matching source
15008 operands, do things in registers. */
15009 matching_memory = 0;
15012 if (rtx_equal_p (dst, src))
15013 matching_memory = 1;
15015 dst = gen_reg_rtx (mode);
15018 /* When source operand is memory, destination must match. */
15019 if (MEM_P (src) && !matching_memory)
15020 src = force_reg (mode, src);
15022 /* Emit the instruction. */
15024 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15025 if (reload_in_progress || code == NOT)
15027 /* Reload doesn't know about the flags register, and doesn't know that
15028 it doesn't want to clobber it. */
15029 gcc_assert (code == NOT);
15034 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15035 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15038 /* Fix up the destination if needed. */
15039 if (dst != operands[0])
15040 emit_move_insn (operands[0], dst);
15043 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15044 divisor are within the the range [0-255]. */
15047 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15050 rtx end_label, qimode_label;
15051 rtx insn, div, mod;
15052 rtx scratch, tmp0, tmp1, tmp2;
15053 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15054 rtx (*gen_zero_extend) (rtx, rtx);
15055 rtx (*gen_test_ccno_1) (rtx, rtx);
15060 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15061 gen_test_ccno_1 = gen_testsi_ccno_1;
15062 gen_zero_extend = gen_zero_extendqisi2;
15065 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15066 gen_test_ccno_1 = gen_testdi_ccno_1;
15067 gen_zero_extend = gen_zero_extendqidi2;
15070 gcc_unreachable ();
15073 end_label = gen_label_rtx ();
15074 qimode_label = gen_label_rtx ();
15076 scratch = gen_reg_rtx (mode);
15078 /* Use 8bit unsigned divimod if dividend and divisor are within the
15079 the range [0-255]. */
15080 emit_move_insn (scratch, operands[2]);
15081 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15082 scratch, 1, OPTAB_DIRECT);
15083 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15084 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15085 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15086 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15087 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15089 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15090 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15091 JUMP_LABEL (insn) = qimode_label;
15093 /* Generate original signed/unsigned divimod. */
15094 div = gen_divmod4_1 (operands[0], operands[1],
15095 operands[2], operands[3]);
15098 /* Branch to the end. */
15099 emit_jump_insn (gen_jump (end_label));
15102 /* Generate 8bit unsigned divide. */
15103 emit_label (qimode_label);
15104 /* Don't use operands[0] for result of 8bit divide since not all
15105 registers support QImode ZERO_EXTRACT. */
15106 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15107 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15108 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15109 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15113 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15114 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15118 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15119 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15122 /* Extract remainder from AH. */
15123 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15124 if (REG_P (operands[1]))
15125 insn = emit_move_insn (operands[1], tmp1);
15128 /* Need a new scratch register since the old one has result
15130 scratch = gen_reg_rtx (mode);
15131 emit_move_insn (scratch, tmp1);
15132 insn = emit_move_insn (operands[1], scratch);
15134 set_unique_reg_note (insn, REG_EQUAL, mod);
15136 /* Zero extend quotient from AL. */
15137 tmp1 = gen_lowpart (QImode, tmp0);
15138 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15139 set_unique_reg_note (insn, REG_EQUAL, div);
15141 emit_label (end_label);
15144 #define LEA_SEARCH_THRESHOLD 12
15146 /* Search backward for non-agu definition of register number REGNO1
15147 or register number REGNO2 in INSN's basic block until
15148 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15149 2. Reach BB boundary, or
15150 3. Reach agu definition.
15151 Returns the distance between the non-agu definition point and INSN.
15152 If no definition point, returns -1. */
15155 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15158 basic_block bb = BLOCK_FOR_INSN (insn);
15161 enum attr_type insn_type;
15163 if (insn != BB_HEAD (bb))
15165 rtx prev = PREV_INSN (insn);
15166 while (prev && distance < LEA_SEARCH_THRESHOLD)
15168 if (NONDEBUG_INSN_P (prev))
15171 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15172 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15173 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15174 && (regno1 == DF_REF_REGNO (*def_rec)
15175 || regno2 == DF_REF_REGNO (*def_rec)))
15177 insn_type = get_attr_type (prev);
15178 if (insn_type != TYPE_LEA)
15182 if (prev == BB_HEAD (bb))
15184 prev = PREV_INSN (prev);
15188 if (distance < LEA_SEARCH_THRESHOLD)
15192 bool simple_loop = false;
15194 FOR_EACH_EDGE (e, ei, bb->preds)
15197 simple_loop = true;
15203 rtx prev = BB_END (bb);
15206 && distance < LEA_SEARCH_THRESHOLD)
15208 if (NONDEBUG_INSN_P (prev))
15211 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15212 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15213 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15214 && (regno1 == DF_REF_REGNO (*def_rec)
15215 || regno2 == DF_REF_REGNO (*def_rec)))
15217 insn_type = get_attr_type (prev);
15218 if (insn_type != TYPE_LEA)
15222 prev = PREV_INSN (prev);
15230 /* get_attr_type may modify recog data. We want to make sure
15231 that recog data is valid for instruction INSN, on which
15232 distance_non_agu_define is called. INSN is unchanged here. */
15233 extract_insn_cached (insn);
15237 /* Return the distance between INSN and the next insn that uses
15238 register number REGNO0 in memory address. Return -1 if no such
15239 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15242 distance_agu_use (unsigned int regno0, rtx insn)
15244 basic_block bb = BLOCK_FOR_INSN (insn);
15249 if (insn != BB_END (bb))
15251 rtx next = NEXT_INSN (insn);
15252 while (next && distance < LEA_SEARCH_THRESHOLD)
15254 if (NONDEBUG_INSN_P (next))
15258 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15259 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15260 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15261 && regno0 == DF_REF_REGNO (*use_rec))
15263 /* Return DISTANCE if OP0 is used in memory
15264 address in NEXT. */
15268 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15269 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15270 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15271 && regno0 == DF_REF_REGNO (*def_rec))
15273 /* Return -1 if OP0 is set in NEXT. */
15277 if (next == BB_END (bb))
15279 next = NEXT_INSN (next);
15283 if (distance < LEA_SEARCH_THRESHOLD)
15287 bool simple_loop = false;
15289 FOR_EACH_EDGE (e, ei, bb->succs)
15292 simple_loop = true;
15298 rtx next = BB_HEAD (bb);
15301 && distance < LEA_SEARCH_THRESHOLD)
15303 if (NONDEBUG_INSN_P (next))
15307 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15308 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15309 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15310 && regno0 == DF_REF_REGNO (*use_rec))
15312 /* Return DISTANCE if OP0 is used in memory
15313 address in NEXT. */
15317 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15318 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15319 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15320 && regno0 == DF_REF_REGNO (*def_rec))
15322 /* Return -1 if OP0 is set in NEXT. */
15327 next = NEXT_INSN (next);
15335 /* Define this macro to tune LEA priority vs ADD, it take effect when
15336 there is a dilemma of choicing LEA or ADD
15337 Negative value: ADD is more preferred than LEA
15339 Positive value: LEA is more preferred than ADD*/
15340 #define IX86_LEA_PRIORITY 2
15342 /* Return true if it is ok to optimize an ADD operation to LEA
15343 operation to avoid flag register consumation. For most processors,
15344 ADD is faster than LEA. For the processors like ATOM, if the
15345 destination register of LEA holds an actual address which will be
15346 used soon, LEA is better and otherwise ADD is better. */
15349 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15351 unsigned int regno0 = true_regnum (operands[0]);
15352 unsigned int regno1 = true_regnum (operands[1]);
15353 unsigned int regno2 = true_regnum (operands[2]);
15355 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15356 if (regno0 != regno1 && regno0 != regno2)
15359 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15363 int dist_define, dist_use;
15365 /* Return false if REGNO0 isn't used in memory address. */
15366 dist_use = distance_agu_use (regno0, insn);
15370 dist_define = distance_non_agu_define (regno1, regno2, insn);
15371 if (dist_define <= 0)
15374 /* If this insn has both backward non-agu dependence and forward
15375 agu dependence, the one with short distance take effect. */
15376 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15383 /* Return true if destination reg of SET_BODY is shift count of
15387 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15393 /* Retrieve destination of SET_BODY. */
15394 switch (GET_CODE (set_body))
15397 set_dest = SET_DEST (set_body);
15398 if (!set_dest || !REG_P (set_dest))
15402 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15403 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15411 /* Retrieve shift count of USE_BODY. */
15412 switch (GET_CODE (use_body))
15415 shift_rtx = XEXP (use_body, 1);
15418 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15419 if (ix86_dep_by_shift_count_body (set_body,
15420 XVECEXP (use_body, 0, i)))
15428 && (GET_CODE (shift_rtx) == ASHIFT
15429 || GET_CODE (shift_rtx) == LSHIFTRT
15430 || GET_CODE (shift_rtx) == ASHIFTRT
15431 || GET_CODE (shift_rtx) == ROTATE
15432 || GET_CODE (shift_rtx) == ROTATERT))
15434 rtx shift_count = XEXP (shift_rtx, 1);
15436 /* Return true if shift count is dest of SET_BODY. */
15437 if (REG_P (shift_count)
15438 && true_regnum (set_dest) == true_regnum (shift_count))
15445 /* Return true if destination reg of SET_INSN is shift count of
15449 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15451 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15452 PATTERN (use_insn));
15455 /* Return TRUE or FALSE depending on whether the unary operator meets the
15456 appropriate constraints. */
15459 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15460 enum machine_mode mode ATTRIBUTE_UNUSED,
15461 rtx operands[2] ATTRIBUTE_UNUSED)
15463 /* If one of operands is memory, source and destination must match. */
15464 if ((MEM_P (operands[0])
15465 || MEM_P (operands[1]))
15466 && ! rtx_equal_p (operands[0], operands[1]))
15471 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15472 are ok, keeping in mind the possible movddup alternative. */
15475 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15477 if (MEM_P (operands[0]))
15478 return rtx_equal_p (operands[0], operands[1 + high]);
15479 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15480 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15484 /* Post-reload splitter for converting an SF or DFmode value in an
15485 SSE register into an unsigned SImode. */
15488 ix86_split_convert_uns_si_sse (rtx operands[])
15490 enum machine_mode vecmode;
15491 rtx value, large, zero_or_two31, input, two31, x;
15493 large = operands[1];
15494 zero_or_two31 = operands[2];
15495 input = operands[3];
15496 two31 = operands[4];
15497 vecmode = GET_MODE (large);
15498 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15500 /* Load up the value into the low element. We must ensure that the other
15501 elements are valid floats -- zero is the easiest such value. */
15504 if (vecmode == V4SFmode)
15505 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15507 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15511 input = gen_rtx_REG (vecmode, REGNO (input));
15512 emit_move_insn (value, CONST0_RTX (vecmode));
15513 if (vecmode == V4SFmode)
15514 emit_insn (gen_sse_movss (value, value, input));
15516 emit_insn (gen_sse2_movsd (value, value, input));
15519 emit_move_insn (large, two31);
15520 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15522 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15523 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15525 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15526 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15528 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15529 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15531 large = gen_rtx_REG (V4SImode, REGNO (large));
15532 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15534 x = gen_rtx_REG (V4SImode, REGNO (value));
15535 if (vecmode == V4SFmode)
15536 emit_insn (gen_sse2_cvttps2dq (x, value));
15538 emit_insn (gen_sse2_cvttpd2dq (x, value));
15541 emit_insn (gen_xorv4si3 (value, value, large));
15544 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15545 Expects the 64-bit DImode to be supplied in a pair of integral
15546 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15547 -mfpmath=sse, !optimize_size only. */
15550 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15552 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15553 rtx int_xmm, fp_xmm;
15554 rtx biases, exponents;
15557 int_xmm = gen_reg_rtx (V4SImode);
15558 if (TARGET_INTER_UNIT_MOVES)
15559 emit_insn (gen_movdi_to_sse (int_xmm, input));
15560 else if (TARGET_SSE_SPLIT_REGS)
15562 emit_clobber (int_xmm);
15563 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15567 x = gen_reg_rtx (V2DImode);
15568 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15569 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15572 x = gen_rtx_CONST_VECTOR (V4SImode,
15573 gen_rtvec (4, GEN_INT (0x43300000UL),
15574 GEN_INT (0x45300000UL),
15575 const0_rtx, const0_rtx));
15576 exponents = validize_mem (force_const_mem (V4SImode, x));
15578 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15579 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15581 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15582 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15583 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15584 (0x1.0p84 + double(fp_value_hi_xmm)).
15585 Note these exponents differ by 32. */
15587 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15589 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15590 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15591 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15592 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15593 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15594 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15595 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15596 biases = validize_mem (force_const_mem (V2DFmode, biases));
15597 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15599 /* Add the upper and lower DFmode values together. */
15601 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15604 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15605 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15606 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15609 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15612 /* Not used, but eases macroization of patterns. */
15614 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15615 rtx input ATTRIBUTE_UNUSED)
15617 gcc_unreachable ();
15620 /* Convert an unsigned SImode value into a DFmode. Only currently used
15621 for SSE, but applicable anywhere. */
15624 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15626 REAL_VALUE_TYPE TWO31r;
15629 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15630 NULL, 1, OPTAB_DIRECT);
15632 fp = gen_reg_rtx (DFmode);
15633 emit_insn (gen_floatsidf2 (fp, x));
15635 real_ldexp (&TWO31r, &dconst1, 31);
15636 x = const_double_from_real_value (TWO31r, DFmode);
15638 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15640 emit_move_insn (target, x);
15643 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15644 32-bit mode; otherwise we have a direct convert instruction. */
15647 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15649 REAL_VALUE_TYPE TWO32r;
15650 rtx fp_lo, fp_hi, x;
15652 fp_lo = gen_reg_rtx (DFmode);
15653 fp_hi = gen_reg_rtx (DFmode);
15655 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15657 real_ldexp (&TWO32r, &dconst1, 32);
15658 x = const_double_from_real_value (TWO32r, DFmode);
15659 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15661 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15663 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15666 emit_move_insn (target, x);
15669 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15670 For x86_32, -mfpmath=sse, !optimize_size only. */
15672 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15674 REAL_VALUE_TYPE ONE16r;
15675 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15677 real_ldexp (&ONE16r, &dconst1, 16);
15678 x = const_double_from_real_value (ONE16r, SFmode);
15679 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15680 NULL, 0, OPTAB_DIRECT);
15681 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15682 NULL, 0, OPTAB_DIRECT);
15683 fp_hi = gen_reg_rtx (SFmode);
15684 fp_lo = gen_reg_rtx (SFmode);
15685 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15686 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15687 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15689 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15691 if (!rtx_equal_p (target, fp_hi))
15692 emit_move_insn (target, fp_hi);
15695 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15696 then replicate the value for all elements of the vector
15700 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15707 v = gen_rtvec (4, value, value, value, value);
15708 return gen_rtx_CONST_VECTOR (V4SImode, v);
15712 v = gen_rtvec (2, value, value);
15713 return gen_rtx_CONST_VECTOR (V2DImode, v);
15717 v = gen_rtvec (4, value, value, value, value);
15719 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15720 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15721 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15725 v = gen_rtvec (2, value, value);
15727 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15728 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15731 gcc_unreachable ();
15735 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15736 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15737 for an SSE register. If VECT is true, then replicate the mask for
15738 all elements of the vector register. If INVERT is true, then create
15739 a mask excluding the sign bit. */
15742 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15744 enum machine_mode vec_mode, imode;
15745 HOST_WIDE_INT hi, lo;
15750 /* Find the sign bit, sign extended to 2*HWI. */
15756 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15757 lo = 0x80000000, hi = lo < 0;
15763 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15764 if (HOST_BITS_PER_WIDE_INT >= 64)
15765 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15767 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15772 vec_mode = VOIDmode;
15773 if (HOST_BITS_PER_WIDE_INT >= 64)
15776 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15783 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15787 lo = ~lo, hi = ~hi;
15793 mask = immed_double_const (lo, hi, imode);
15795 vec = gen_rtvec (2, v, mask);
15796 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15797 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15804 gcc_unreachable ();
15808 lo = ~lo, hi = ~hi;
15810 /* Force this value into the low part of a fp vector constant. */
15811 mask = immed_double_const (lo, hi, imode);
15812 mask = gen_lowpart (mode, mask);
15814 if (vec_mode == VOIDmode)
15815 return force_reg (mode, mask);
15817 v = ix86_build_const_vector (mode, vect, mask);
15818 return force_reg (vec_mode, v);
15821 /* Generate code for floating point ABS or NEG. */
15824 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15827 rtx mask, set, use, clob, dst, src;
15828 bool use_sse = false;
15829 bool vector_mode = VECTOR_MODE_P (mode);
15830 enum machine_mode elt_mode = mode;
15834 elt_mode = GET_MODE_INNER (mode);
15837 else if (mode == TFmode)
15839 else if (TARGET_SSE_MATH)
15840 use_sse = SSE_FLOAT_MODE_P (mode);
15842 /* NEG and ABS performed with SSE use bitwise mask operations.
15843 Create the appropriate mask now. */
15845 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15854 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15855 set = gen_rtx_SET (VOIDmode, dst, set);
15860 set = gen_rtx_fmt_e (code, mode, src);
15861 set = gen_rtx_SET (VOIDmode, dst, set);
15864 use = gen_rtx_USE (VOIDmode, mask);
15865 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15866 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15867 gen_rtvec (3, set, use, clob)));
15874 /* Expand a copysign operation. Special case operand 0 being a constant. */
15877 ix86_expand_copysign (rtx operands[])
15879 enum machine_mode mode;
15880 rtx dest, op0, op1, mask, nmask;
15882 dest = operands[0];
15886 mode = GET_MODE (dest);
15888 if (GET_CODE (op0) == CONST_DOUBLE)
15890 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15892 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15893 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15895 if (mode == SFmode || mode == DFmode)
15897 enum machine_mode vmode;
15899 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15901 if (op0 == CONST0_RTX (mode))
15902 op0 = CONST0_RTX (vmode);
15905 rtx v = ix86_build_const_vector (mode, false, op0);
15907 op0 = force_reg (vmode, v);
15910 else if (op0 != CONST0_RTX (mode))
15911 op0 = force_reg (mode, op0);
15913 mask = ix86_build_signbit_mask (mode, 0, 0);
15915 if (mode == SFmode)
15916 copysign_insn = gen_copysignsf3_const;
15917 else if (mode == DFmode)
15918 copysign_insn = gen_copysigndf3_const;
15920 copysign_insn = gen_copysigntf3_const;
15922 emit_insn (copysign_insn (dest, op0, op1, mask));
15926 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15928 nmask = ix86_build_signbit_mask (mode, 0, 1);
15929 mask = ix86_build_signbit_mask (mode, 0, 0);
15931 if (mode == SFmode)
15932 copysign_insn = gen_copysignsf3_var;
15933 else if (mode == DFmode)
15934 copysign_insn = gen_copysigndf3_var;
15936 copysign_insn = gen_copysigntf3_var;
15938 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15942 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15943 be a constant, and so has already been expanded into a vector constant. */
15946 ix86_split_copysign_const (rtx operands[])
15948 enum machine_mode mode, vmode;
15949 rtx dest, op0, mask, x;
15951 dest = operands[0];
15953 mask = operands[3];
15955 mode = GET_MODE (dest);
15956 vmode = GET_MODE (mask);
15958 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15959 x = gen_rtx_AND (vmode, dest, mask);
15960 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15962 if (op0 != CONST0_RTX (vmode))
15964 x = gen_rtx_IOR (vmode, dest, op0);
15965 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15969 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15970 so we have to do two masks. */
15973 ix86_split_copysign_var (rtx operands[])
15975 enum machine_mode mode, vmode;
15976 rtx dest, scratch, op0, op1, mask, nmask, x;
15978 dest = operands[0];
15979 scratch = operands[1];
15982 nmask = operands[4];
15983 mask = operands[5];
15985 mode = GET_MODE (dest);
15986 vmode = GET_MODE (mask);
15988 if (rtx_equal_p (op0, op1))
15990 /* Shouldn't happen often (it's useless, obviously), but when it does
15991 we'd generate incorrect code if we continue below. */
15992 emit_move_insn (dest, op0);
15996 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15998 gcc_assert (REGNO (op1) == REGNO (scratch));
16000 x = gen_rtx_AND (vmode, scratch, mask);
16001 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16004 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16005 x = gen_rtx_NOT (vmode, dest);
16006 x = gen_rtx_AND (vmode, x, op0);
16007 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16011 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16013 x = gen_rtx_AND (vmode, scratch, mask);
16015 else /* alternative 2,4 */
16017 gcc_assert (REGNO (mask) == REGNO (scratch));
16018 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16019 x = gen_rtx_AND (vmode, scratch, op1);
16021 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16023 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16025 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16026 x = gen_rtx_AND (vmode, dest, nmask);
16028 else /* alternative 3,4 */
16030 gcc_assert (REGNO (nmask) == REGNO (dest));
16032 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16033 x = gen_rtx_AND (vmode, dest, op0);
16035 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16038 x = gen_rtx_IOR (vmode, dest, scratch);
16039 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16042 /* Return TRUE or FALSE depending on whether the first SET in INSN
16043 has source and destination with matching CC modes, and that the
16044 CC mode is at least as constrained as REQ_MODE. */
16047 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16050 enum machine_mode set_mode;
16052 set = PATTERN (insn);
16053 if (GET_CODE (set) == PARALLEL)
16054 set = XVECEXP (set, 0, 0);
16055 gcc_assert (GET_CODE (set) == SET);
16056 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16058 set_mode = GET_MODE (SET_DEST (set));
16062 if (req_mode != CCNOmode
16063 && (req_mode != CCmode
16064 || XEXP (SET_SRC (set), 1) != const0_rtx))
16068 if (req_mode == CCGCmode)
16072 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16076 if (req_mode == CCZmode)
16087 gcc_unreachable ();
16090 return GET_MODE (SET_SRC (set)) == set_mode;
16093 /* Generate insn patterns to do an integer compare of OPERANDS. */
16096 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16098 enum machine_mode cmpmode;
16101 cmpmode = SELECT_CC_MODE (code, op0, op1);
16102 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16104 /* This is very simple, but making the interface the same as in the
16105 FP case makes the rest of the code easier. */
16106 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16107 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16109 /* Return the test that should be put into the flags user, i.e.
16110 the bcc, scc, or cmov instruction. */
16111 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16114 /* Figure out whether to use ordered or unordered fp comparisons.
16115 Return the appropriate mode to use. */
16118 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16120 /* ??? In order to make all comparisons reversible, we do all comparisons
16121 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16122 all forms trapping and nontrapping comparisons, we can make inequality
16123 comparisons trapping again, since it results in better code when using
16124 FCOM based compares. */
16125 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16129 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16131 enum machine_mode mode = GET_MODE (op0);
16133 if (SCALAR_FLOAT_MODE_P (mode))
16135 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16136 return ix86_fp_compare_mode (code);
16141 /* Only zero flag is needed. */
16142 case EQ: /* ZF=0 */
16143 case NE: /* ZF!=0 */
16145 /* Codes needing carry flag. */
16146 case GEU: /* CF=0 */
16147 case LTU: /* CF=1 */
16148 /* Detect overflow checks. They need just the carry flag. */
16149 if (GET_CODE (op0) == PLUS
16150 && rtx_equal_p (op1, XEXP (op0, 0)))
16154 case GTU: /* CF=0 & ZF=0 */
16155 case LEU: /* CF=1 | ZF=1 */
16156 /* Detect overflow checks. They need just the carry flag. */
16157 if (GET_CODE (op0) == MINUS
16158 && rtx_equal_p (op1, XEXP (op0, 0)))
16162 /* Codes possibly doable only with sign flag when
16163 comparing against zero. */
16164 case GE: /* SF=OF or SF=0 */
16165 case LT: /* SF<>OF or SF=1 */
16166 if (op1 == const0_rtx)
16169 /* For other cases Carry flag is not required. */
16171 /* Codes doable only with sign flag when comparing
16172 against zero, but we miss jump instruction for it
16173 so we need to use relational tests against overflow
16174 that thus needs to be zero. */
16175 case GT: /* ZF=0 & SF=OF */
16176 case LE: /* ZF=1 | SF<>OF */
16177 if (op1 == const0_rtx)
16181 /* strcmp pattern do (use flags) and combine may ask us for proper
16186 gcc_unreachable ();
16190 /* Return the fixed registers used for condition codes. */
16193 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16200 /* If two condition code modes are compatible, return a condition code
16201 mode which is compatible with both. Otherwise, return
16204 static enum machine_mode
16205 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16210 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16213 if ((m1 == CCGCmode && m2 == CCGOCmode)
16214 || (m1 == CCGOCmode && m2 == CCGCmode))
16220 gcc_unreachable ();
16250 /* These are only compatible with themselves, which we already
16257 /* Return a comparison we can do and that it is equivalent to
16258 swap_condition (code) apart possibly from orderedness.
16259 But, never change orderedness if TARGET_IEEE_FP, returning
16260 UNKNOWN in that case if necessary. */
16262 static enum rtx_code
16263 ix86_fp_swap_condition (enum rtx_code code)
16267 case GT: /* GTU - CF=0 & ZF=0 */
16268 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16269 case GE: /* GEU - CF=0 */
16270 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16271 case UNLT: /* LTU - CF=1 */
16272 return TARGET_IEEE_FP ? UNKNOWN : GT;
16273 case UNLE: /* LEU - CF=1 | ZF=1 */
16274 return TARGET_IEEE_FP ? UNKNOWN : GE;
16276 return swap_condition (code);
16280 /* Return cost of comparison CODE using the best strategy for performance.
16281 All following functions do use number of instructions as a cost metrics.
16282 In future this should be tweaked to compute bytes for optimize_size and
16283 take into account performance of various instructions on various CPUs. */
16286 ix86_fp_comparison_cost (enum rtx_code code)
16290 /* The cost of code using bit-twiddling on %ah. */
16307 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16311 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16314 gcc_unreachable ();
16317 switch (ix86_fp_comparison_strategy (code))
16319 case IX86_FPCMP_COMI:
16320 return arith_cost > 4 ? 3 : 2;
16321 case IX86_FPCMP_SAHF:
16322 return arith_cost > 4 ? 4 : 3;
16328 /* Return strategy to use for floating-point. We assume that fcomi is always
16329 preferrable where available, since that is also true when looking at size
16330 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16332 enum ix86_fpcmp_strategy
16333 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16335 /* Do fcomi/sahf based test when profitable. */
16338 return IX86_FPCMP_COMI;
16340 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16341 return IX86_FPCMP_SAHF;
16343 return IX86_FPCMP_ARITH;
16346 /* Swap, force into registers, or otherwise massage the two operands
16347 to a fp comparison. The operands are updated in place; the new
16348 comparison code is returned. */
16350 static enum rtx_code
16351 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16353 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16354 rtx op0 = *pop0, op1 = *pop1;
16355 enum machine_mode op_mode = GET_MODE (op0);
16356 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16358 /* All of the unordered compare instructions only work on registers.
16359 The same is true of the fcomi compare instructions. The XFmode
16360 compare instructions require registers except when comparing
16361 against zero or when converting operand 1 from fixed point to
16365 && (fpcmp_mode == CCFPUmode
16366 || (op_mode == XFmode
16367 && ! (standard_80387_constant_p (op0) == 1
16368 || standard_80387_constant_p (op1) == 1)
16369 && GET_CODE (op1) != FLOAT)
16370 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16372 op0 = force_reg (op_mode, op0);
16373 op1 = force_reg (op_mode, op1);
16377 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16378 things around if they appear profitable, otherwise force op0
16379 into a register. */
16381 if (standard_80387_constant_p (op0) == 0
16383 && ! (standard_80387_constant_p (op1) == 0
16386 enum rtx_code new_code = ix86_fp_swap_condition (code);
16387 if (new_code != UNKNOWN)
16390 tmp = op0, op0 = op1, op1 = tmp;
16396 op0 = force_reg (op_mode, op0);
16398 if (CONSTANT_P (op1))
16400 int tmp = standard_80387_constant_p (op1);
16402 op1 = validize_mem (force_const_mem (op_mode, op1));
16406 op1 = force_reg (op_mode, op1);
16409 op1 = force_reg (op_mode, op1);
16413 /* Try to rearrange the comparison to make it cheaper. */
16414 if (ix86_fp_comparison_cost (code)
16415 > ix86_fp_comparison_cost (swap_condition (code))
16416 && (REG_P (op1) || can_create_pseudo_p ()))
16419 tmp = op0, op0 = op1, op1 = tmp;
16420 code = swap_condition (code);
16422 op0 = force_reg (op_mode, op0);
16430 /* Convert comparison codes we use to represent FP comparison to integer
16431 code that will result in proper branch. Return UNKNOWN if no such code
16435 ix86_fp_compare_code_to_integer (enum rtx_code code)
16464 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16467 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16469 enum machine_mode fpcmp_mode, intcmp_mode;
16472 fpcmp_mode = ix86_fp_compare_mode (code);
16473 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16475 /* Do fcomi/sahf based test when profitable. */
16476 switch (ix86_fp_comparison_strategy (code))
16478 case IX86_FPCMP_COMI:
16479 intcmp_mode = fpcmp_mode;
16480 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16481 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16486 case IX86_FPCMP_SAHF:
16487 intcmp_mode = fpcmp_mode;
16488 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16489 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16493 scratch = gen_reg_rtx (HImode);
16494 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16495 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16498 case IX86_FPCMP_ARITH:
16499 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16500 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16501 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16503 scratch = gen_reg_rtx (HImode);
16504 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16506 /* In the unordered case, we have to check C2 for NaN's, which
16507 doesn't happen to work out to anything nice combination-wise.
16508 So do some bit twiddling on the value we've got in AH to come
16509 up with an appropriate set of condition codes. */
16511 intcmp_mode = CCNOmode;
16516 if (code == GT || !TARGET_IEEE_FP)
16518 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16523 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16524 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16525 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16526 intcmp_mode = CCmode;
16532 if (code == LT && TARGET_IEEE_FP)
16534 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16535 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16536 intcmp_mode = CCmode;
16541 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16547 if (code == GE || !TARGET_IEEE_FP)
16549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16554 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16555 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16561 if (code == LE && TARGET_IEEE_FP)
16563 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16564 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16565 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16566 intcmp_mode = CCmode;
16571 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16577 if (code == EQ && TARGET_IEEE_FP)
16579 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16580 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16581 intcmp_mode = CCmode;
16586 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16592 if (code == NE && TARGET_IEEE_FP)
16594 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16595 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16601 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16607 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16616 gcc_unreachable ();
16624 /* Return the test that should be put into the flags user, i.e.
16625 the bcc, scc, or cmov instruction. */
16626 return gen_rtx_fmt_ee (code, VOIDmode,
16627 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16632 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16636 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16637 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16639 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16641 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16642 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16645 ret = ix86_expand_int_compare (code, op0, op1);
16651 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16653 enum machine_mode mode = GET_MODE (op0);
16665 tmp = ix86_expand_compare (code, op0, op1);
16666 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16667 gen_rtx_LABEL_REF (VOIDmode, label),
16669 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16676 /* Expand DImode branch into multiple compare+branch. */
16678 rtx lo[2], hi[2], label2;
16679 enum rtx_code code1, code2, code3;
16680 enum machine_mode submode;
16682 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16684 tmp = op0, op0 = op1, op1 = tmp;
16685 code = swap_condition (code);
16688 split_double_mode (mode, &op0, 1, lo+0, hi+0);
16689 split_double_mode (mode, &op1, 1, lo+1, hi+1);
16691 submode = mode == DImode ? SImode : DImode;
16693 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16694 avoid two branches. This costs one extra insn, so disable when
16695 optimizing for size. */
16697 if ((code == EQ || code == NE)
16698 && (!optimize_insn_for_size_p ()
16699 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16704 if (hi[1] != const0_rtx)
16705 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16706 NULL_RTX, 0, OPTAB_WIDEN);
16709 if (lo[1] != const0_rtx)
16710 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16711 NULL_RTX, 0, OPTAB_WIDEN);
16713 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16714 NULL_RTX, 0, OPTAB_WIDEN);
16716 ix86_expand_branch (code, tmp, const0_rtx, label);
16720 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16721 op1 is a constant and the low word is zero, then we can just
16722 examine the high word. Similarly for low word -1 and
16723 less-or-equal-than or greater-than. */
16725 if (CONST_INT_P (hi[1]))
16728 case LT: case LTU: case GE: case GEU:
16729 if (lo[1] == const0_rtx)
16731 ix86_expand_branch (code, hi[0], hi[1], label);
16735 case LE: case LEU: case GT: case GTU:
16736 if (lo[1] == constm1_rtx)
16738 ix86_expand_branch (code, hi[0], hi[1], label);
16746 /* Otherwise, we need two or three jumps. */
16748 label2 = gen_label_rtx ();
16751 code2 = swap_condition (code);
16752 code3 = unsigned_condition (code);
16756 case LT: case GT: case LTU: case GTU:
16759 case LE: code1 = LT; code2 = GT; break;
16760 case GE: code1 = GT; code2 = LT; break;
16761 case LEU: code1 = LTU; code2 = GTU; break;
16762 case GEU: code1 = GTU; code2 = LTU; break;
16764 case EQ: code1 = UNKNOWN; code2 = NE; break;
16765 case NE: code2 = UNKNOWN; break;
16768 gcc_unreachable ();
16773 * if (hi(a) < hi(b)) goto true;
16774 * if (hi(a) > hi(b)) goto false;
16775 * if (lo(a) < lo(b)) goto true;
16779 if (code1 != UNKNOWN)
16780 ix86_expand_branch (code1, hi[0], hi[1], label);
16781 if (code2 != UNKNOWN)
16782 ix86_expand_branch (code2, hi[0], hi[1], label2);
16784 ix86_expand_branch (code3, lo[0], lo[1], label);
16786 if (code2 != UNKNOWN)
16787 emit_label (label2);
16792 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16797 /* Split branch based on floating point condition. */
16799 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16800 rtx target1, rtx target2, rtx tmp, rtx pushed)
16805 if (target2 != pc_rtx)
16808 code = reverse_condition_maybe_unordered (code);
16813 condition = ix86_expand_fp_compare (code, op1, op2,
16816 /* Remove pushed operand from stack. */
16818 ix86_free_from_memory (GET_MODE (pushed));
16820 i = emit_jump_insn (gen_rtx_SET
16822 gen_rtx_IF_THEN_ELSE (VOIDmode,
16823 condition, target1, target2)));
16824 if (split_branch_probability >= 0)
16825 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16829 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16833 gcc_assert (GET_MODE (dest) == QImode);
16835 ret = ix86_expand_compare (code, op0, op1);
16836 PUT_MODE (ret, QImode);
16837 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16840 /* Expand comparison setting or clearing carry flag. Return true when
16841 successful and set pop for the operation. */
16843 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16845 enum machine_mode mode =
16846 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16848 /* Do not handle double-mode compares that go through special path. */
16849 if (mode == (TARGET_64BIT ? TImode : DImode))
16852 if (SCALAR_FLOAT_MODE_P (mode))
16854 rtx compare_op, compare_seq;
16856 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16858 /* Shortcut: following common codes never translate
16859 into carry flag compares. */
16860 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16861 || code == ORDERED || code == UNORDERED)
16864 /* These comparisons require zero flag; swap operands so they won't. */
16865 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16866 && !TARGET_IEEE_FP)
16871 code = swap_condition (code);
16874 /* Try to expand the comparison and verify that we end up with
16875 carry flag based comparison. This fails to be true only when
16876 we decide to expand comparison using arithmetic that is not
16877 too common scenario. */
16879 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16880 compare_seq = get_insns ();
16883 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16884 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16885 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16887 code = GET_CODE (compare_op);
16889 if (code != LTU && code != GEU)
16892 emit_insn (compare_seq);
16897 if (!INTEGRAL_MODE_P (mode))
16906 /* Convert a==0 into (unsigned)a<1. */
16909 if (op1 != const0_rtx)
16912 code = (code == EQ ? LTU : GEU);
16915 /* Convert a>b into b<a or a>=b-1. */
16918 if (CONST_INT_P (op1))
16920 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16921 /* Bail out on overflow. We still can swap operands but that
16922 would force loading of the constant into register. */
16923 if (op1 == const0_rtx
16924 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16926 code = (code == GTU ? GEU : LTU);
16933 code = (code == GTU ? LTU : GEU);
16937 /* Convert a>=0 into (unsigned)a<0x80000000. */
16940 if (mode == DImode || op1 != const0_rtx)
16942 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16943 code = (code == LT ? GEU : LTU);
16947 if (mode == DImode || op1 != constm1_rtx)
16949 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16950 code = (code == LE ? GEU : LTU);
16956 /* Swapping operands may cause constant to appear as first operand. */
16957 if (!nonimmediate_operand (op0, VOIDmode))
16959 if (!can_create_pseudo_p ())
16961 op0 = force_reg (mode, op0);
16963 *pop = ix86_expand_compare (code, op0, op1);
16964 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16969 ix86_expand_int_movcc (rtx operands[])
16971 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16972 rtx compare_seq, compare_op;
16973 enum machine_mode mode = GET_MODE (operands[0]);
16974 bool sign_bit_compare_p = false;
16975 rtx op0 = XEXP (operands[1], 0);
16976 rtx op1 = XEXP (operands[1], 1);
16979 compare_op = ix86_expand_compare (code, op0, op1);
16980 compare_seq = get_insns ();
16983 compare_code = GET_CODE (compare_op);
16985 if ((op1 == const0_rtx && (code == GE || code == LT))
16986 || (op1 == constm1_rtx && (code == GT || code == LE)))
16987 sign_bit_compare_p = true;
16989 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16990 HImode insns, we'd be swallowed in word prefix ops. */
16992 if ((mode != HImode || TARGET_FAST_PREFIX)
16993 && (mode != (TARGET_64BIT ? TImode : DImode))
16994 && CONST_INT_P (operands[2])
16995 && CONST_INT_P (operands[3]))
16997 rtx out = operands[0];
16998 HOST_WIDE_INT ct = INTVAL (operands[2]);
16999 HOST_WIDE_INT cf = INTVAL (operands[3]);
17000 HOST_WIDE_INT diff;
17003 /* Sign bit compares are better done using shifts than we do by using
17005 if (sign_bit_compare_p
17006 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17008 /* Detect overlap between destination and compare sources. */
17011 if (!sign_bit_compare_p)
17014 bool fpcmp = false;
17016 compare_code = GET_CODE (compare_op);
17018 flags = XEXP (compare_op, 0);
17020 if (GET_MODE (flags) == CCFPmode
17021 || GET_MODE (flags) == CCFPUmode)
17025 = ix86_fp_compare_code_to_integer (compare_code);
17028 /* To simplify rest of code, restrict to the GEU case. */
17029 if (compare_code == LTU)
17031 HOST_WIDE_INT tmp = ct;
17034 compare_code = reverse_condition (compare_code);
17035 code = reverse_condition (code);
17040 PUT_CODE (compare_op,
17041 reverse_condition_maybe_unordered
17042 (GET_CODE (compare_op)));
17044 PUT_CODE (compare_op,
17045 reverse_condition (GET_CODE (compare_op)));
17049 if (reg_overlap_mentioned_p (out, op0)
17050 || reg_overlap_mentioned_p (out, op1))
17051 tmp = gen_reg_rtx (mode);
17053 if (mode == DImode)
17054 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17056 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17057 flags, compare_op));
17061 if (code == GT || code == GE)
17062 code = reverse_condition (code);
17065 HOST_WIDE_INT tmp = ct;
17070 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17083 tmp = expand_simple_binop (mode, PLUS,
17085 copy_rtx (tmp), 1, OPTAB_DIRECT);
17096 tmp = expand_simple_binop (mode, IOR,
17098 copy_rtx (tmp), 1, OPTAB_DIRECT);
17100 else if (diff == -1 && ct)
17110 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17112 tmp = expand_simple_binop (mode, PLUS,
17113 copy_rtx (tmp), GEN_INT (cf),
17114 copy_rtx (tmp), 1, OPTAB_DIRECT);
17122 * andl cf - ct, dest
17132 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17135 tmp = expand_simple_binop (mode, AND,
17137 gen_int_mode (cf - ct, mode),
17138 copy_rtx (tmp), 1, OPTAB_DIRECT);
17140 tmp = expand_simple_binop (mode, PLUS,
17141 copy_rtx (tmp), GEN_INT (ct),
17142 copy_rtx (tmp), 1, OPTAB_DIRECT);
17145 if (!rtx_equal_p (tmp, out))
17146 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17153 enum machine_mode cmp_mode = GET_MODE (op0);
17156 tmp = ct, ct = cf, cf = tmp;
17159 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17161 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17163 /* We may be reversing unordered compare to normal compare, that
17164 is not valid in general (we may convert non-trapping condition
17165 to trapping one), however on i386 we currently emit all
17166 comparisons unordered. */
17167 compare_code = reverse_condition_maybe_unordered (compare_code);
17168 code = reverse_condition_maybe_unordered (code);
17172 compare_code = reverse_condition (compare_code);
17173 code = reverse_condition (code);
17177 compare_code = UNKNOWN;
17178 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17179 && CONST_INT_P (op1))
17181 if (op1 == const0_rtx
17182 && (code == LT || code == GE))
17183 compare_code = code;
17184 else if (op1 == constm1_rtx)
17188 else if (code == GT)
17193 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17194 if (compare_code != UNKNOWN
17195 && GET_MODE (op0) == GET_MODE (out)
17196 && (cf == -1 || ct == -1))
17198 /* If lea code below could be used, only optimize
17199 if it results in a 2 insn sequence. */
17201 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17202 || diff == 3 || diff == 5 || diff == 9)
17203 || (compare_code == LT && ct == -1)
17204 || (compare_code == GE && cf == -1))
17207 * notl op1 (if necessary)
17215 code = reverse_condition (code);
17218 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17220 out = expand_simple_binop (mode, IOR,
17222 out, 1, OPTAB_DIRECT);
17223 if (out != operands[0])
17224 emit_move_insn (operands[0], out);
17231 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17232 || diff == 3 || diff == 5 || diff == 9)
17233 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17235 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17241 * lea cf(dest*(ct-cf)),dest
17245 * This also catches the degenerate setcc-only case.
17251 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17254 /* On x86_64 the lea instruction operates on Pmode, so we need
17255 to get arithmetics done in proper mode to match. */
17257 tmp = copy_rtx (out);
17261 out1 = copy_rtx (out);
17262 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17266 tmp = gen_rtx_PLUS (mode, tmp, out1);
17272 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17275 if (!rtx_equal_p (tmp, out))
17278 out = force_operand (tmp, copy_rtx (out));
17280 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17282 if (!rtx_equal_p (out, operands[0]))
17283 emit_move_insn (operands[0], copy_rtx (out));
17289 * General case: Jumpful:
17290 * xorl dest,dest cmpl op1, op2
17291 * cmpl op1, op2 movl ct, dest
17292 * setcc dest jcc 1f
17293 * decl dest movl cf, dest
17294 * andl (cf-ct),dest 1:
17297 * Size 20. Size 14.
17299 * This is reasonably steep, but branch mispredict costs are
17300 * high on modern cpus, so consider failing only if optimizing
17304 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17305 && BRANCH_COST (optimize_insn_for_speed_p (),
17310 enum machine_mode cmp_mode = GET_MODE (op0);
17315 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17317 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17319 /* We may be reversing unordered compare to normal compare,
17320 that is not valid in general (we may convert non-trapping
17321 condition to trapping one), however on i386 we currently
17322 emit all comparisons unordered. */
17323 code = reverse_condition_maybe_unordered (code);
17327 code = reverse_condition (code);
17328 if (compare_code != UNKNOWN)
17329 compare_code = reverse_condition (compare_code);
17333 if (compare_code != UNKNOWN)
17335 /* notl op1 (if needed)
17340 For x < 0 (resp. x <= -1) there will be no notl,
17341 so if possible swap the constants to get rid of the
17343 True/false will be -1/0 while code below (store flag
17344 followed by decrement) is 0/-1, so the constants need
17345 to be exchanged once more. */
17347 if (compare_code == GE || !cf)
17349 code = reverse_condition (code);
17354 HOST_WIDE_INT tmp = cf;
17359 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17363 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17365 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17367 copy_rtx (out), 1, OPTAB_DIRECT);
17370 out = expand_simple_binop (mode, AND, copy_rtx (out),
17371 gen_int_mode (cf - ct, mode),
17372 copy_rtx (out), 1, OPTAB_DIRECT);
17374 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17375 copy_rtx (out), 1, OPTAB_DIRECT);
17376 if (!rtx_equal_p (out, operands[0]))
17377 emit_move_insn (operands[0], copy_rtx (out));
17383 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17385 /* Try a few things more with specific constants and a variable. */
17388 rtx var, orig_out, out, tmp;
17390 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17393 /* If one of the two operands is an interesting constant, load a
17394 constant with the above and mask it in with a logical operation. */
17396 if (CONST_INT_P (operands[2]))
17399 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17400 operands[3] = constm1_rtx, op = and_optab;
17401 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17402 operands[3] = const0_rtx, op = ior_optab;
17406 else if (CONST_INT_P (operands[3]))
17409 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17410 operands[2] = constm1_rtx, op = and_optab;
17411 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17412 operands[2] = const0_rtx, op = ior_optab;
17419 orig_out = operands[0];
17420 tmp = gen_reg_rtx (mode);
17423 /* Recurse to get the constant loaded. */
17424 if (ix86_expand_int_movcc (operands) == 0)
17427 /* Mask in the interesting variable. */
17428 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17430 if (!rtx_equal_p (out, orig_out))
17431 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17437 * For comparison with above,
17447 if (! nonimmediate_operand (operands[2], mode))
17448 operands[2] = force_reg (mode, operands[2]);
17449 if (! nonimmediate_operand (operands[3], mode))
17450 operands[3] = force_reg (mode, operands[3]);
17452 if (! register_operand (operands[2], VOIDmode)
17454 || ! register_operand (operands[3], VOIDmode)))
17455 operands[2] = force_reg (mode, operands[2]);
17458 && ! register_operand (operands[3], VOIDmode))
17459 operands[3] = force_reg (mode, operands[3]);
17461 emit_insn (compare_seq);
17462 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17463 gen_rtx_IF_THEN_ELSE (mode,
17464 compare_op, operands[2],
17469 /* Swap, force into registers, or otherwise massage the two operands
17470 to an sse comparison with a mask result. Thus we differ a bit from
17471 ix86_prepare_fp_compare_args which expects to produce a flags result.
17473 The DEST operand exists to help determine whether to commute commutative
17474 operators. The POP0/POP1 operands are updated in place. The new
17475 comparison code is returned, or UNKNOWN if not implementable. */
17477 static enum rtx_code
17478 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17479 rtx *pop0, rtx *pop1)
17487 /* We have no LTGT as an operator. We could implement it with
17488 NE & ORDERED, but this requires an extra temporary. It's
17489 not clear that it's worth it. */
17496 /* These are supported directly. */
17503 /* For commutative operators, try to canonicalize the destination
17504 operand to be first in the comparison - this helps reload to
17505 avoid extra moves. */
17506 if (!dest || !rtx_equal_p (dest, *pop1))
17514 /* These are not supported directly. Swap the comparison operands
17515 to transform into something that is supported. */
17519 code = swap_condition (code);
17523 gcc_unreachable ();
17529 /* Detect conditional moves that exactly match min/max operational
17530 semantics. Note that this is IEEE safe, as long as we don't
17531 interchange the operands.
17533 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17534 and TRUE if the operation is successful and instructions are emitted. */
17537 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17538 rtx cmp_op1, rtx if_true, rtx if_false)
17540 enum machine_mode mode;
17546 else if (code == UNGE)
17549 if_true = if_false;
17555 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17557 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17562 mode = GET_MODE (dest);
17564 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17565 but MODE may be a vector mode and thus not appropriate. */
17566 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17568 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17571 if_true = force_reg (mode, if_true);
17572 v = gen_rtvec (2, if_true, if_false);
17573 tmp = gen_rtx_UNSPEC (mode, v, u);
17577 code = is_min ? SMIN : SMAX;
17578 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17581 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17585 /* Expand an sse vector comparison. Return the register with the result. */
17588 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17589 rtx op_true, rtx op_false)
17591 enum machine_mode mode = GET_MODE (dest);
17594 cmp_op0 = force_reg (mode, cmp_op0);
17595 if (!nonimmediate_operand (cmp_op1, mode))
17596 cmp_op1 = force_reg (mode, cmp_op1);
17599 || reg_overlap_mentioned_p (dest, op_true)
17600 || reg_overlap_mentioned_p (dest, op_false))
17601 dest = gen_reg_rtx (mode);
17603 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17604 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17609 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17610 operations. This is used for both scalar and vector conditional moves. */
17613 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17615 enum machine_mode mode = GET_MODE (dest);
17618 if (op_false == CONST0_RTX (mode))
17620 op_true = force_reg (mode, op_true);
17621 x = gen_rtx_AND (mode, cmp, op_true);
17622 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17624 else if (op_true == CONST0_RTX (mode))
17626 op_false = force_reg (mode, op_false);
17627 x = gen_rtx_NOT (mode, cmp);
17628 x = gen_rtx_AND (mode, x, op_false);
17629 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17631 else if (TARGET_XOP)
17633 rtx pcmov = gen_rtx_SET (mode, dest,
17634 gen_rtx_IF_THEN_ELSE (mode, cmp,
17641 op_true = force_reg (mode, op_true);
17642 op_false = force_reg (mode, op_false);
17644 t2 = gen_reg_rtx (mode);
17646 t3 = gen_reg_rtx (mode);
17650 x = gen_rtx_AND (mode, op_true, cmp);
17651 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17653 x = gen_rtx_NOT (mode, cmp);
17654 x = gen_rtx_AND (mode, x, op_false);
17655 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17657 x = gen_rtx_IOR (mode, t3, t2);
17658 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17662 /* Expand a floating-point conditional move. Return true if successful. */
17665 ix86_expand_fp_movcc (rtx operands[])
17667 enum machine_mode mode = GET_MODE (operands[0]);
17668 enum rtx_code code = GET_CODE (operands[1]);
17669 rtx tmp, compare_op;
17670 rtx op0 = XEXP (operands[1], 0);
17671 rtx op1 = XEXP (operands[1], 1);
17673 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17675 enum machine_mode cmode;
17677 /* Since we've no cmove for sse registers, don't force bad register
17678 allocation just to gain access to it. Deny movcc when the
17679 comparison mode doesn't match the move mode. */
17680 cmode = GET_MODE (op0);
17681 if (cmode == VOIDmode)
17682 cmode = GET_MODE (op1);
17686 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17687 if (code == UNKNOWN)
17690 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17691 operands[2], operands[3]))
17694 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17695 operands[2], operands[3]);
17696 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17700 /* The floating point conditional move instructions don't directly
17701 support conditions resulting from a signed integer comparison. */
17703 compare_op = ix86_expand_compare (code, op0, op1);
17704 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17706 tmp = gen_reg_rtx (QImode);
17707 ix86_expand_setcc (tmp, code, op0, op1);
17709 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17712 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17713 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17714 operands[2], operands[3])));
17719 /* Expand a floating-point vector conditional move; a vcond operation
17720 rather than a movcc operation. */
17723 ix86_expand_fp_vcond (rtx operands[])
17725 enum rtx_code code = GET_CODE (operands[3]);
17728 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17729 &operands[4], &operands[5]);
17730 if (code == UNKNOWN)
17733 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17734 operands[5], operands[1], operands[2]))
17737 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17738 operands[1], operands[2]);
17739 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17743 /* Expand a signed/unsigned integral vector conditional move. */
17746 ix86_expand_int_vcond (rtx operands[])
17748 enum machine_mode mode = GET_MODE (operands[0]);
17749 enum rtx_code code = GET_CODE (operands[3]);
17750 bool negate = false;
17753 cop0 = operands[4];
17754 cop1 = operands[5];
17756 /* XOP supports all of the comparisons on all vector int types. */
17759 /* Canonicalize the comparison to EQ, GT, GTU. */
17770 code = reverse_condition (code);
17776 code = reverse_condition (code);
17782 code = swap_condition (code);
17783 x = cop0, cop0 = cop1, cop1 = x;
17787 gcc_unreachable ();
17790 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17791 if (mode == V2DImode)
17796 /* SSE4.1 supports EQ. */
17797 if (!TARGET_SSE4_1)
17803 /* SSE4.2 supports GT/GTU. */
17804 if (!TARGET_SSE4_2)
17809 gcc_unreachable ();
17813 /* Unsigned parallel compare is not supported by the hardware.
17814 Play some tricks to turn this into a signed comparison
17818 cop0 = force_reg (mode, cop0);
17826 rtx (*gen_sub3) (rtx, rtx, rtx);
17828 /* Subtract (-(INT MAX) - 1) from both operands to make
17830 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17832 gen_sub3 = (mode == V4SImode
17833 ? gen_subv4si3 : gen_subv2di3);
17834 t1 = gen_reg_rtx (mode);
17835 emit_insn (gen_sub3 (t1, cop0, mask));
17837 t2 = gen_reg_rtx (mode);
17838 emit_insn (gen_sub3 (t2, cop1, mask));
17848 /* Perform a parallel unsigned saturating subtraction. */
17849 x = gen_reg_rtx (mode);
17850 emit_insn (gen_rtx_SET (VOIDmode, x,
17851 gen_rtx_US_MINUS (mode, cop0, cop1)));
17854 cop1 = CONST0_RTX (mode);
17860 gcc_unreachable ();
17865 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17866 operands[1+negate], operands[2-negate]);
17868 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17869 operands[2-negate]);
17873 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17874 true if we should do zero extension, else sign extension. HIGH_P is
17875 true if we want the N/2 high elements, else the low elements. */
17878 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17880 enum machine_mode imode = GET_MODE (operands[1]);
17881 rtx (*unpack)(rtx, rtx, rtx);
17888 unpack = gen_vec_interleave_highv16qi;
17890 unpack = gen_vec_interleave_lowv16qi;
17894 unpack = gen_vec_interleave_highv8hi;
17896 unpack = gen_vec_interleave_lowv8hi;
17900 unpack = gen_vec_interleave_highv4si;
17902 unpack = gen_vec_interleave_lowv4si;
17905 gcc_unreachable ();
17908 dest = gen_lowpart (imode, operands[0]);
17911 se = force_reg (imode, CONST0_RTX (imode));
17913 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17914 operands[1], pc_rtx, pc_rtx);
17916 emit_insn (unpack (dest, operands[1], se));
17919 /* This function performs the same task as ix86_expand_sse_unpack,
17920 but with SSE4.1 instructions. */
17923 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17925 enum machine_mode imode = GET_MODE (operands[1]);
17926 rtx (*unpack)(rtx, rtx);
17933 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17935 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17939 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17941 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17945 unpack = gen_sse4_1_zero_extendv2siv2di2;
17947 unpack = gen_sse4_1_sign_extendv2siv2di2;
17950 gcc_unreachable ();
17953 dest = operands[0];
17956 /* Shift higher 8 bytes to lower 8 bytes. */
17957 src = gen_reg_rtx (imode);
17958 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17959 gen_lowpart (V1TImode, operands[1]),
17965 emit_insn (unpack (dest, src));
17968 /* Expand conditional increment or decrement using adb/sbb instructions.
17969 The default case using setcc followed by the conditional move can be
17970 done by generic code. */
17972 ix86_expand_int_addcc (rtx operands[])
17974 enum rtx_code code = GET_CODE (operands[1]);
17976 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17978 rtx val = const0_rtx;
17979 bool fpcmp = false;
17980 enum machine_mode mode;
17981 rtx op0 = XEXP (operands[1], 0);
17982 rtx op1 = XEXP (operands[1], 1);
17984 if (operands[3] != const1_rtx
17985 && operands[3] != constm1_rtx)
17987 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17989 code = GET_CODE (compare_op);
17991 flags = XEXP (compare_op, 0);
17993 if (GET_MODE (flags) == CCFPmode
17994 || GET_MODE (flags) == CCFPUmode)
17997 code = ix86_fp_compare_code_to_integer (code);
18004 PUT_CODE (compare_op,
18005 reverse_condition_maybe_unordered
18006 (GET_CODE (compare_op)));
18008 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18011 mode = GET_MODE (operands[0]);
18013 /* Construct either adc or sbb insn. */
18014 if ((code == LTU) == (operands[3] == constm1_rtx))
18019 insn = gen_subqi3_carry;
18022 insn = gen_subhi3_carry;
18025 insn = gen_subsi3_carry;
18028 insn = gen_subdi3_carry;
18031 gcc_unreachable ();
18039 insn = gen_addqi3_carry;
18042 insn = gen_addhi3_carry;
18045 insn = gen_addsi3_carry;
18048 insn = gen_adddi3_carry;
18051 gcc_unreachable ();
18054 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18060 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18061 but works for floating pointer parameters and nonoffsetable memories.
18062 For pushes, it returns just stack offsets; the values will be saved
18063 in the right order. Maximally three parts are generated. */
18066 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18071 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18073 size = (GET_MODE_SIZE (mode) + 4) / 8;
18075 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18076 gcc_assert (size >= 2 && size <= 4);
18078 /* Optimize constant pool reference to immediates. This is used by fp
18079 moves, that force all constants to memory to allow combining. */
18080 if (MEM_P (operand) && MEM_READONLY_P (operand))
18082 rtx tmp = maybe_get_pool_constant (operand);
18087 if (MEM_P (operand) && !offsettable_memref_p (operand))
18089 /* The only non-offsetable memories we handle are pushes. */
18090 int ok = push_operand (operand, VOIDmode);
18094 operand = copy_rtx (operand);
18095 PUT_MODE (operand, Pmode);
18096 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18100 if (GET_CODE (operand) == CONST_VECTOR)
18102 enum machine_mode imode = int_mode_for_mode (mode);
18103 /* Caution: if we looked through a constant pool memory above,
18104 the operand may actually have a different mode now. That's
18105 ok, since we want to pun this all the way back to an integer. */
18106 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18107 gcc_assert (operand != NULL);
18113 if (mode == DImode)
18114 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18119 if (REG_P (operand))
18121 gcc_assert (reload_completed);
18122 for (i = 0; i < size; i++)
18123 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18125 else if (offsettable_memref_p (operand))
18127 operand = adjust_address (operand, SImode, 0);
18128 parts[0] = operand;
18129 for (i = 1; i < size; i++)
18130 parts[i] = adjust_address (operand, SImode, 4 * i);
18132 else if (GET_CODE (operand) == CONST_DOUBLE)
18137 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18141 real_to_target (l, &r, mode);
18142 parts[3] = gen_int_mode (l[3], SImode);
18143 parts[2] = gen_int_mode (l[2], SImode);
18146 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18147 parts[2] = gen_int_mode (l[2], SImode);
18150 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18153 gcc_unreachable ();
18155 parts[1] = gen_int_mode (l[1], SImode);
18156 parts[0] = gen_int_mode (l[0], SImode);
18159 gcc_unreachable ();
18164 if (mode == TImode)
18165 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18166 if (mode == XFmode || mode == TFmode)
18168 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18169 if (REG_P (operand))
18171 gcc_assert (reload_completed);
18172 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18173 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18175 else if (offsettable_memref_p (operand))
18177 operand = adjust_address (operand, DImode, 0);
18178 parts[0] = operand;
18179 parts[1] = adjust_address (operand, upper_mode, 8);
18181 else if (GET_CODE (operand) == CONST_DOUBLE)
18186 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18187 real_to_target (l, &r, mode);
18189 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18190 if (HOST_BITS_PER_WIDE_INT >= 64)
18193 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18194 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18197 parts[0] = immed_double_const (l[0], l[1], DImode);
18199 if (upper_mode == SImode)
18200 parts[1] = gen_int_mode (l[2], SImode);
18201 else if (HOST_BITS_PER_WIDE_INT >= 64)
18204 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18205 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18208 parts[1] = immed_double_const (l[2], l[3], DImode);
18211 gcc_unreachable ();
18218 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18219 Return false when normal moves are needed; true when all required
18220 insns have been emitted. Operands 2-4 contain the input values
18221 int the correct order; operands 5-7 contain the output values. */
18224 ix86_split_long_move (rtx operands[])
18229 int collisions = 0;
18230 enum machine_mode mode = GET_MODE (operands[0]);
18231 bool collisionparts[4];
18233 /* The DFmode expanders may ask us to move double.
18234 For 64bit target this is single move. By hiding the fact
18235 here we simplify i386.md splitters. */
18236 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18238 /* Optimize constant pool reference to immediates. This is used by
18239 fp moves, that force all constants to memory to allow combining. */
18241 if (MEM_P (operands[1])
18242 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18243 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18244 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18245 if (push_operand (operands[0], VOIDmode))
18247 operands[0] = copy_rtx (operands[0]);
18248 PUT_MODE (operands[0], Pmode);
18251 operands[0] = gen_lowpart (DImode, operands[0]);
18252 operands[1] = gen_lowpart (DImode, operands[1]);
18253 emit_move_insn (operands[0], operands[1]);
18257 /* The only non-offsettable memory we handle is push. */
18258 if (push_operand (operands[0], VOIDmode))
18261 gcc_assert (!MEM_P (operands[0])
18262 || offsettable_memref_p (operands[0]));
18264 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18265 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18267 /* When emitting push, take care for source operands on the stack. */
18268 if (push && MEM_P (operands[1])
18269 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18271 rtx src_base = XEXP (part[1][nparts - 1], 0);
18273 /* Compensate for the stack decrement by 4. */
18274 if (!TARGET_64BIT && nparts == 3
18275 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18276 src_base = plus_constant (src_base, 4);
18278 /* src_base refers to the stack pointer and is
18279 automatically decreased by emitted push. */
18280 for (i = 0; i < nparts; i++)
18281 part[1][i] = change_address (part[1][i],
18282 GET_MODE (part[1][i]), src_base);
18285 /* We need to do copy in the right order in case an address register
18286 of the source overlaps the destination. */
18287 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18291 for (i = 0; i < nparts; i++)
18294 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18295 if (collisionparts[i])
18299 /* Collision in the middle part can be handled by reordering. */
18300 if (collisions == 1 && nparts == 3 && collisionparts [1])
18302 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18303 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18305 else if (collisions == 1
18307 && (collisionparts [1] || collisionparts [2]))
18309 if (collisionparts [1])
18311 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18312 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18316 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18317 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18321 /* If there are more collisions, we can't handle it by reordering.
18322 Do an lea to the last part and use only one colliding move. */
18323 else if (collisions > 1)
18329 base = part[0][nparts - 1];
18331 /* Handle the case when the last part isn't valid for lea.
18332 Happens in 64-bit mode storing the 12-byte XFmode. */
18333 if (GET_MODE (base) != Pmode)
18334 base = gen_rtx_REG (Pmode, REGNO (base));
18336 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18337 part[1][0] = replace_equiv_address (part[1][0], base);
18338 for (i = 1; i < nparts; i++)
18340 tmp = plus_constant (base, UNITS_PER_WORD * i);
18341 part[1][i] = replace_equiv_address (part[1][i], tmp);
18352 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18353 emit_insn (gen_addsi3 (stack_pointer_rtx,
18354 stack_pointer_rtx, GEN_INT (-4)));
18355 emit_move_insn (part[0][2], part[1][2]);
18357 else if (nparts == 4)
18359 emit_move_insn (part[0][3], part[1][3]);
18360 emit_move_insn (part[0][2], part[1][2]);
18365 /* In 64bit mode we don't have 32bit push available. In case this is
18366 register, it is OK - we will just use larger counterpart. We also
18367 retype memory - these comes from attempt to avoid REX prefix on
18368 moving of second half of TFmode value. */
18369 if (GET_MODE (part[1][1]) == SImode)
18371 switch (GET_CODE (part[1][1]))
18374 part[1][1] = adjust_address (part[1][1], DImode, 0);
18378 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18382 gcc_unreachable ();
18385 if (GET_MODE (part[1][0]) == SImode)
18386 part[1][0] = part[1][1];
18389 emit_move_insn (part[0][1], part[1][1]);
18390 emit_move_insn (part[0][0], part[1][0]);
18394 /* Choose correct order to not overwrite the source before it is copied. */
18395 if ((REG_P (part[0][0])
18396 && REG_P (part[1][1])
18397 && (REGNO (part[0][0]) == REGNO (part[1][1])
18399 && REGNO (part[0][0]) == REGNO (part[1][2]))
18401 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18403 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18405 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18407 operands[2 + i] = part[0][j];
18408 operands[6 + i] = part[1][j];
18413 for (i = 0; i < nparts; i++)
18415 operands[2 + i] = part[0][i];
18416 operands[6 + i] = part[1][i];
18420 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18421 if (optimize_insn_for_size_p ())
18423 for (j = 0; j < nparts - 1; j++)
18424 if (CONST_INT_P (operands[6 + j])
18425 && operands[6 + j] != const0_rtx
18426 && REG_P (operands[2 + j]))
18427 for (i = j; i < nparts - 1; i++)
18428 if (CONST_INT_P (operands[7 + i])
18429 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18430 operands[7 + i] = operands[2 + j];
18433 for (i = 0; i < nparts; i++)
18434 emit_move_insn (operands[2 + i], operands[6 + i]);
18439 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18440 left shift by a constant, either using a single shift or
18441 a sequence of add instructions. */
18444 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18446 rtx (*insn)(rtx, rtx, rtx);
18449 || (count * ix86_cost->add <= ix86_cost->shift_const
18450 && !optimize_insn_for_size_p ()))
18452 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
18453 while (count-- > 0)
18454 emit_insn (insn (operand, operand, operand));
18458 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18459 emit_insn (insn (operand, operand, GEN_INT (count)));
18464 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18466 rtx (*gen_ashl3)(rtx, rtx, rtx);
18467 rtx (*gen_shld)(rtx, rtx, rtx);
18468 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18470 rtx low[2], high[2];
18473 if (CONST_INT_P (operands[2]))
18475 split_double_mode (mode, operands, 2, low, high);
18476 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18478 if (count >= half_width)
18480 emit_move_insn (high[0], low[1]);
18481 emit_move_insn (low[0], const0_rtx);
18483 if (count > half_width)
18484 ix86_expand_ashl_const (high[0], count - half_width, mode);
18488 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18490 if (!rtx_equal_p (operands[0], operands[1]))
18491 emit_move_insn (operands[0], operands[1]);
18493 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
18494 ix86_expand_ashl_const (low[0], count, mode);
18499 split_double_mode (mode, operands, 1, low, high);
18501 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18503 if (operands[1] == const1_rtx)
18505 /* Assuming we've chosen a QImode capable registers, then 1 << N
18506 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18507 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18509 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18511 ix86_expand_clear (low[0]);
18512 ix86_expand_clear (high[0]);
18513 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
18515 d = gen_lowpart (QImode, low[0]);
18516 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18517 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18518 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18520 d = gen_lowpart (QImode, high[0]);
18521 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18522 s = gen_rtx_NE (QImode, flags, const0_rtx);
18523 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18526 /* Otherwise, we can get the same results by manually performing
18527 a bit extract operation on bit 5/6, and then performing the two
18528 shifts. The two methods of getting 0/1 into low/high are exactly
18529 the same size. Avoiding the shift in the bit extract case helps
18530 pentium4 a bit; no one else seems to care much either way. */
18533 enum machine_mode half_mode;
18534 rtx (*gen_lshr3)(rtx, rtx, rtx);
18535 rtx (*gen_and3)(rtx, rtx, rtx);
18536 rtx (*gen_xor3)(rtx, rtx, rtx);
18537 HOST_WIDE_INT bits;
18540 if (mode == DImode)
18542 half_mode = SImode;
18543 gen_lshr3 = gen_lshrsi3;
18544 gen_and3 = gen_andsi3;
18545 gen_xor3 = gen_xorsi3;
18550 half_mode = DImode;
18551 gen_lshr3 = gen_lshrdi3;
18552 gen_and3 = gen_anddi3;
18553 gen_xor3 = gen_xordi3;
18557 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18558 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
18560 x = gen_lowpart (half_mode, operands[2]);
18561 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18563 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
18564 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
18565 emit_move_insn (low[0], high[0]);
18566 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
18569 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18570 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
18574 if (operands[1] == constm1_rtx)
18576 /* For -1 << N, we can avoid the shld instruction, because we
18577 know that we're shifting 0...31/63 ones into a -1. */
18578 emit_move_insn (low[0], constm1_rtx);
18579 if (optimize_insn_for_size_p ())
18580 emit_move_insn (high[0], low[0]);
18582 emit_move_insn (high[0], constm1_rtx);
18586 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18588 if (!rtx_equal_p (operands[0], operands[1]))
18589 emit_move_insn (operands[0], operands[1]);
18591 split_double_mode (mode, operands, 1, low, high);
18592 emit_insn (gen_shld (high[0], low[0], operands[2]));
18595 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18597 if (TARGET_CMOVE && scratch)
18599 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18600 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18602 ix86_expand_clear (scratch);
18603 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
18607 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18608 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18610 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
18615 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18617 rtx (*gen_ashr3)(rtx, rtx, rtx)
18618 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
18619 rtx (*gen_shrd)(rtx, rtx, rtx);
18620 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18622 rtx low[2], high[2];
18625 if (CONST_INT_P (operands[2]))
18627 split_double_mode (mode, operands, 2, low, high);
18628 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18630 if (count == GET_MODE_BITSIZE (mode) - 1)
18632 emit_move_insn (high[0], high[1]);
18633 emit_insn (gen_ashr3 (high[0], high[0],
18634 GEN_INT (half_width - 1)));
18635 emit_move_insn (low[0], high[0]);
18638 else if (count >= half_width)
18640 emit_move_insn (low[0], high[1]);
18641 emit_move_insn (high[0], low[0]);
18642 emit_insn (gen_ashr3 (high[0], high[0],
18643 GEN_INT (half_width - 1)));
18645 if (count > half_width)
18646 emit_insn (gen_ashr3 (low[0], low[0],
18647 GEN_INT (count - half_width)));
18651 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18653 if (!rtx_equal_p (operands[0], operands[1]))
18654 emit_move_insn (operands[0], operands[1]);
18656 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18657 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
18662 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18664 if (!rtx_equal_p (operands[0], operands[1]))
18665 emit_move_insn (operands[0], operands[1]);
18667 split_double_mode (mode, operands, 1, low, high);
18669 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18670 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
18672 if (TARGET_CMOVE && scratch)
18674 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18675 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18677 emit_move_insn (scratch, high[0]);
18678 emit_insn (gen_ashr3 (scratch, scratch,
18679 GEN_INT (half_width - 1)));
18680 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18685 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
18686 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
18688 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
18694 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18696 rtx (*gen_lshr3)(rtx, rtx, rtx)
18697 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
18698 rtx (*gen_shrd)(rtx, rtx, rtx);
18699 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18701 rtx low[2], high[2];
18704 if (CONST_INT_P (operands[2]))
18706 split_double_mode (mode, operands, 2, low, high);
18707 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18709 if (count >= half_width)
18711 emit_move_insn (low[0], high[1]);
18712 ix86_expand_clear (high[0]);
18714 if (count > half_width)
18715 emit_insn (gen_lshr3 (low[0], low[0],
18716 GEN_INT (count - half_width)));
18720 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18722 if (!rtx_equal_p (operands[0], operands[1]))
18723 emit_move_insn (operands[0], operands[1]);
18725 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18726 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
18731 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18733 if (!rtx_equal_p (operands[0], operands[1]))
18734 emit_move_insn (operands[0], operands[1]);
18736 split_double_mode (mode, operands, 1, low, high);
18738 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18739 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
18741 if (TARGET_CMOVE && scratch)
18743 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18744 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18746 ix86_expand_clear (scratch);
18747 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18752 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18753 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18755 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
18760 /* Predict just emitted jump instruction to be taken with probability PROB. */
18762 predict_jump (int prob)
18764 rtx insn = get_last_insn ();
18765 gcc_assert (JUMP_P (insn));
18766 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18769 /* Helper function for the string operations below. Dest VARIABLE whether
18770 it is aligned to VALUE bytes. If true, jump to the label. */
18772 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18774 rtx label = gen_label_rtx ();
18775 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18776 if (GET_MODE (variable) == DImode)
18777 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18779 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18780 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18783 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18785 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18789 /* Adjust COUNTER by the VALUE. */
18791 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18793 rtx (*gen_add)(rtx, rtx, rtx)
18794 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
18796 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
18799 /* Zero extend possibly SImode EXP to Pmode register. */
18801 ix86_zero_extend_to_Pmode (rtx exp)
18804 if (GET_MODE (exp) == VOIDmode)
18805 return force_reg (Pmode, exp);
18806 if (GET_MODE (exp) == Pmode)
18807 return copy_to_mode_reg (Pmode, exp);
18808 r = gen_reg_rtx (Pmode);
18809 emit_insn (gen_zero_extendsidi2 (r, exp));
18813 /* Divide COUNTREG by SCALE. */
18815 scale_counter (rtx countreg, int scale)
18821 if (CONST_INT_P (countreg))
18822 return GEN_INT (INTVAL (countreg) / scale);
18823 gcc_assert (REG_P (countreg));
18825 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18826 GEN_INT (exact_log2 (scale)),
18827 NULL, 1, OPTAB_DIRECT);
18831 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18832 DImode for constant loop counts. */
18834 static enum machine_mode
18835 counter_mode (rtx count_exp)
18837 if (GET_MODE (count_exp) != VOIDmode)
18838 return GET_MODE (count_exp);
18839 if (!CONST_INT_P (count_exp))
18841 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18846 /* When SRCPTR is non-NULL, output simple loop to move memory
18847 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18848 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18849 equivalent loop to set memory by VALUE (supposed to be in MODE).
18851 The size is rounded down to whole number of chunk size moved at once.
18852 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18856 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18857 rtx destptr, rtx srcptr, rtx value,
18858 rtx count, enum machine_mode mode, int unroll,
18861 rtx out_label, top_label, iter, tmp;
18862 enum machine_mode iter_mode = counter_mode (count);
18863 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18864 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18870 top_label = gen_label_rtx ();
18871 out_label = gen_label_rtx ();
18872 iter = gen_reg_rtx (iter_mode);
18874 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18875 NULL, 1, OPTAB_DIRECT);
18876 /* Those two should combine. */
18877 if (piece_size == const1_rtx)
18879 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18881 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18883 emit_move_insn (iter, const0_rtx);
18885 emit_label (top_label);
18887 tmp = convert_modes (Pmode, iter_mode, iter, true);
18888 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18889 destmem = change_address (destmem, mode, x_addr);
18893 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18894 srcmem = change_address (srcmem, mode, y_addr);
18896 /* When unrolling for chips that reorder memory reads and writes,
18897 we can save registers by using single temporary.
18898 Also using 4 temporaries is overkill in 32bit mode. */
18899 if (!TARGET_64BIT && 0)
18901 for (i = 0; i < unroll; i++)
18906 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18908 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18910 emit_move_insn (destmem, srcmem);
18916 gcc_assert (unroll <= 4);
18917 for (i = 0; i < unroll; i++)
18919 tmpreg[i] = gen_reg_rtx (mode);
18923 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18925 emit_move_insn (tmpreg[i], srcmem);
18927 for (i = 0; i < unroll; i++)
18932 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18934 emit_move_insn (destmem, tmpreg[i]);
18939 for (i = 0; i < unroll; i++)
18943 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18944 emit_move_insn (destmem, value);
18947 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18948 true, OPTAB_LIB_WIDEN);
18950 emit_move_insn (iter, tmp);
18952 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18954 if (expected_size != -1)
18956 expected_size /= GET_MODE_SIZE (mode) * unroll;
18957 if (expected_size == 0)
18959 else if (expected_size > REG_BR_PROB_BASE)
18960 predict_jump (REG_BR_PROB_BASE - 1);
18962 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18965 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18966 iter = ix86_zero_extend_to_Pmode (iter);
18967 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18968 true, OPTAB_LIB_WIDEN);
18969 if (tmp != destptr)
18970 emit_move_insn (destptr, tmp);
18973 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18974 true, OPTAB_LIB_WIDEN);
18976 emit_move_insn (srcptr, tmp);
18978 emit_label (out_label);
18981 /* Output "rep; mov" instruction.
18982 Arguments have same meaning as for previous function */
18984 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18985 rtx destptr, rtx srcptr,
18987 enum machine_mode mode)
18993 /* If the size is known, it is shorter to use rep movs. */
18994 if (mode == QImode && CONST_INT_P (count)
18995 && !(INTVAL (count) & 3))
18998 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18999 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19000 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19001 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19002 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19003 if (mode != QImode)
19005 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19006 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19007 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19008 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19009 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19010 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19014 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19015 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19017 if (CONST_INT_P (count))
19019 count = GEN_INT (INTVAL (count)
19020 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19021 destmem = shallow_copy_rtx (destmem);
19022 srcmem = shallow_copy_rtx (srcmem);
19023 set_mem_size (destmem, count);
19024 set_mem_size (srcmem, count);
19028 if (MEM_SIZE (destmem))
19029 set_mem_size (destmem, NULL_RTX);
19030 if (MEM_SIZE (srcmem))
19031 set_mem_size (srcmem, NULL_RTX);
19033 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19037 /* Output "rep; stos" instruction.
19038 Arguments have same meaning as for previous function */
19040 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19041 rtx count, enum machine_mode mode,
19047 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19048 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19049 value = force_reg (mode, gen_lowpart (mode, value));
19050 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19051 if (mode != QImode)
19053 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19054 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19055 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19058 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19059 if (orig_value == const0_rtx && CONST_INT_P (count))
19061 count = GEN_INT (INTVAL (count)
19062 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19063 destmem = shallow_copy_rtx (destmem);
19064 set_mem_size (destmem, count);
19066 else if (MEM_SIZE (destmem))
19067 set_mem_size (destmem, NULL_RTX);
19068 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19072 emit_strmov (rtx destmem, rtx srcmem,
19073 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19075 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19076 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19077 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19080 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19082 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19083 rtx destptr, rtx srcptr, rtx count, int max_size)
19086 if (CONST_INT_P (count))
19088 HOST_WIDE_INT countval = INTVAL (count);
19091 if ((countval & 0x10) && max_size > 16)
19095 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19096 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19099 gcc_unreachable ();
19102 if ((countval & 0x08) && max_size > 8)
19105 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19108 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19109 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19113 if ((countval & 0x04) && max_size > 4)
19115 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19118 if ((countval & 0x02) && max_size > 2)
19120 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19123 if ((countval & 0x01) && max_size > 1)
19125 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19132 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19133 count, 1, OPTAB_DIRECT);
19134 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19135 count, QImode, 1, 4);
19139 /* When there are stringops, we can cheaply increase dest and src pointers.
19140 Otherwise we save code size by maintaining offset (zero is readily
19141 available from preceding rep operation) and using x86 addressing modes.
19143 if (TARGET_SINGLE_STRINGOP)
19147 rtx label = ix86_expand_aligntest (count, 4, true);
19148 src = change_address (srcmem, SImode, srcptr);
19149 dest = change_address (destmem, SImode, destptr);
19150 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19151 emit_label (label);
19152 LABEL_NUSES (label) = 1;
19156 rtx label = ix86_expand_aligntest (count, 2, true);
19157 src = change_address (srcmem, HImode, srcptr);
19158 dest = change_address (destmem, HImode, destptr);
19159 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19160 emit_label (label);
19161 LABEL_NUSES (label) = 1;
19165 rtx label = ix86_expand_aligntest (count, 1, true);
19166 src = change_address (srcmem, QImode, srcptr);
19167 dest = change_address (destmem, QImode, destptr);
19168 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19169 emit_label (label);
19170 LABEL_NUSES (label) = 1;
19175 rtx offset = force_reg (Pmode, const0_rtx);
19180 rtx label = ix86_expand_aligntest (count, 4, true);
19181 src = change_address (srcmem, SImode, srcptr);
19182 dest = change_address (destmem, SImode, destptr);
19183 emit_move_insn (dest, src);
19184 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19185 true, OPTAB_LIB_WIDEN);
19187 emit_move_insn (offset, tmp);
19188 emit_label (label);
19189 LABEL_NUSES (label) = 1;
19193 rtx label = ix86_expand_aligntest (count, 2, true);
19194 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19195 src = change_address (srcmem, HImode, tmp);
19196 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19197 dest = change_address (destmem, HImode, tmp);
19198 emit_move_insn (dest, src);
19199 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19200 true, OPTAB_LIB_WIDEN);
19202 emit_move_insn (offset, tmp);
19203 emit_label (label);
19204 LABEL_NUSES (label) = 1;
19208 rtx label = ix86_expand_aligntest (count, 1, true);
19209 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19210 src = change_address (srcmem, QImode, tmp);
19211 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19212 dest = change_address (destmem, QImode, tmp);
19213 emit_move_insn (dest, src);
19214 emit_label (label);
19215 LABEL_NUSES (label) = 1;
19220 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19222 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19223 rtx count, int max_size)
19226 expand_simple_binop (counter_mode (count), AND, count,
19227 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19228 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19229 gen_lowpart (QImode, value), count, QImode,
19233 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19235 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19239 if (CONST_INT_P (count))
19241 HOST_WIDE_INT countval = INTVAL (count);
19244 if ((countval & 0x10) && max_size > 16)
19248 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19249 emit_insn (gen_strset (destptr, dest, value));
19250 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19251 emit_insn (gen_strset (destptr, dest, value));
19254 gcc_unreachable ();
19257 if ((countval & 0x08) && max_size > 8)
19261 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19262 emit_insn (gen_strset (destptr, dest, value));
19266 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19267 emit_insn (gen_strset (destptr, dest, value));
19268 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19269 emit_insn (gen_strset (destptr, dest, value));
19273 if ((countval & 0x04) && max_size > 4)
19275 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19276 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19279 if ((countval & 0x02) && max_size > 2)
19281 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19282 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19285 if ((countval & 0x01) && max_size > 1)
19287 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19288 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19295 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19300 rtx label = ix86_expand_aligntest (count, 16, true);
19303 dest = change_address (destmem, DImode, destptr);
19304 emit_insn (gen_strset (destptr, dest, value));
19305 emit_insn (gen_strset (destptr, dest, value));
19309 dest = change_address (destmem, SImode, destptr);
19310 emit_insn (gen_strset (destptr, dest, value));
19311 emit_insn (gen_strset (destptr, dest, value));
19312 emit_insn (gen_strset (destptr, dest, value));
19313 emit_insn (gen_strset (destptr, dest, value));
19315 emit_label (label);
19316 LABEL_NUSES (label) = 1;
19320 rtx label = ix86_expand_aligntest (count, 8, true);
19323 dest = change_address (destmem, DImode, destptr);
19324 emit_insn (gen_strset (destptr, dest, value));
19328 dest = change_address (destmem, SImode, destptr);
19329 emit_insn (gen_strset (destptr, dest, value));
19330 emit_insn (gen_strset (destptr, dest, value));
19332 emit_label (label);
19333 LABEL_NUSES (label) = 1;
19337 rtx label = ix86_expand_aligntest (count, 4, true);
19338 dest = change_address (destmem, SImode, destptr);
19339 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19340 emit_label (label);
19341 LABEL_NUSES (label) = 1;
19345 rtx label = ix86_expand_aligntest (count, 2, true);
19346 dest = change_address (destmem, HImode, destptr);
19347 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19348 emit_label (label);
19349 LABEL_NUSES (label) = 1;
19353 rtx label = ix86_expand_aligntest (count, 1, true);
19354 dest = change_address (destmem, QImode, destptr);
19355 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19356 emit_label (label);
19357 LABEL_NUSES (label) = 1;
19361 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19362 DESIRED_ALIGNMENT. */
19364 expand_movmem_prologue (rtx destmem, rtx srcmem,
19365 rtx destptr, rtx srcptr, rtx count,
19366 int align, int desired_alignment)
19368 if (align <= 1 && desired_alignment > 1)
19370 rtx label = ix86_expand_aligntest (destptr, 1, false);
19371 srcmem = change_address (srcmem, QImode, srcptr);
19372 destmem = change_address (destmem, QImode, destptr);
19373 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19374 ix86_adjust_counter (count, 1);
19375 emit_label (label);
19376 LABEL_NUSES (label) = 1;
19378 if (align <= 2 && desired_alignment > 2)
19380 rtx label = ix86_expand_aligntest (destptr, 2, false);
19381 srcmem = change_address (srcmem, HImode, srcptr);
19382 destmem = change_address (destmem, HImode, destptr);
19383 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19384 ix86_adjust_counter (count, 2);
19385 emit_label (label);
19386 LABEL_NUSES (label) = 1;
19388 if (align <= 4 && desired_alignment > 4)
19390 rtx label = ix86_expand_aligntest (destptr, 4, false);
19391 srcmem = change_address (srcmem, SImode, srcptr);
19392 destmem = change_address (destmem, SImode, destptr);
19393 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19394 ix86_adjust_counter (count, 4);
19395 emit_label (label);
19396 LABEL_NUSES (label) = 1;
19398 gcc_assert (desired_alignment <= 8);
19401 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19402 ALIGN_BYTES is how many bytes need to be copied. */
19404 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19405 int desired_align, int align_bytes)
19408 rtx src_size, dst_size;
19410 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19411 if (src_align_bytes >= 0)
19412 src_align_bytes = desired_align - src_align_bytes;
19413 src_size = MEM_SIZE (src);
19414 dst_size = MEM_SIZE (dst);
19415 if (align_bytes & 1)
19417 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19418 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19420 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19422 if (align_bytes & 2)
19424 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19425 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19426 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19427 set_mem_align (dst, 2 * BITS_PER_UNIT);
19428 if (src_align_bytes >= 0
19429 && (src_align_bytes & 1) == (align_bytes & 1)
19430 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19431 set_mem_align (src, 2 * BITS_PER_UNIT);
19433 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19435 if (align_bytes & 4)
19437 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19438 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19439 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19440 set_mem_align (dst, 4 * BITS_PER_UNIT);
19441 if (src_align_bytes >= 0)
19443 unsigned int src_align = 0;
19444 if ((src_align_bytes & 3) == (align_bytes & 3))
19446 else if ((src_align_bytes & 1) == (align_bytes & 1))
19448 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19449 set_mem_align (src, src_align * BITS_PER_UNIT);
19452 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19454 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19455 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19456 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19457 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19458 if (src_align_bytes >= 0)
19460 unsigned int src_align = 0;
19461 if ((src_align_bytes & 7) == (align_bytes & 7))
19463 else if ((src_align_bytes & 3) == (align_bytes & 3))
19465 else if ((src_align_bytes & 1) == (align_bytes & 1))
19467 if (src_align > (unsigned int) desired_align)
19468 src_align = desired_align;
19469 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19470 set_mem_align (src, src_align * BITS_PER_UNIT);
19473 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19475 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19480 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19481 DESIRED_ALIGNMENT. */
19483 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19484 int align, int desired_alignment)
19486 if (align <= 1 && desired_alignment > 1)
19488 rtx label = ix86_expand_aligntest (destptr, 1, false);
19489 destmem = change_address (destmem, QImode, destptr);
19490 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19491 ix86_adjust_counter (count, 1);
19492 emit_label (label);
19493 LABEL_NUSES (label) = 1;
19495 if (align <= 2 && desired_alignment > 2)
19497 rtx label = ix86_expand_aligntest (destptr, 2, false);
19498 destmem = change_address (destmem, HImode, destptr);
19499 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19500 ix86_adjust_counter (count, 2);
19501 emit_label (label);
19502 LABEL_NUSES (label) = 1;
19504 if (align <= 4 && desired_alignment > 4)
19506 rtx label = ix86_expand_aligntest (destptr, 4, false);
19507 destmem = change_address (destmem, SImode, destptr);
19508 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19509 ix86_adjust_counter (count, 4);
19510 emit_label (label);
19511 LABEL_NUSES (label) = 1;
19513 gcc_assert (desired_alignment <= 8);
19516 /* Set enough from DST to align DST known to by aligned by ALIGN to
19517 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19519 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19520 int desired_align, int align_bytes)
19523 rtx dst_size = MEM_SIZE (dst);
19524 if (align_bytes & 1)
19526 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19528 emit_insn (gen_strset (destreg, dst,
19529 gen_lowpart (QImode, value)));
19531 if (align_bytes & 2)
19533 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19534 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19535 set_mem_align (dst, 2 * BITS_PER_UNIT);
19537 emit_insn (gen_strset (destreg, dst,
19538 gen_lowpart (HImode, value)));
19540 if (align_bytes & 4)
19542 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19543 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19544 set_mem_align (dst, 4 * BITS_PER_UNIT);
19546 emit_insn (gen_strset (destreg, dst,
19547 gen_lowpart (SImode, value)));
19549 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19550 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19551 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19553 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19557 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19558 static enum stringop_alg
19559 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19560 int *dynamic_check)
19562 const struct stringop_algs * algs;
19563 bool optimize_for_speed;
19564 /* Algorithms using the rep prefix want at least edi and ecx;
19565 additionally, memset wants eax and memcpy wants esi. Don't
19566 consider such algorithms if the user has appropriated those
19567 registers for their own purposes. */
19568 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19570 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19572 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19573 || (alg != rep_prefix_1_byte \
19574 && alg != rep_prefix_4_byte \
19575 && alg != rep_prefix_8_byte))
19576 const struct processor_costs *cost;
19578 /* Even if the string operation call is cold, we still might spend a lot
19579 of time processing large blocks. */
19580 if (optimize_function_for_size_p (cfun)
19581 || (optimize_insn_for_size_p ()
19582 && expected_size != -1 && expected_size < 256))
19583 optimize_for_speed = false;
19585 optimize_for_speed = true;
19587 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19589 *dynamic_check = -1;
19591 algs = &cost->memset[TARGET_64BIT != 0];
19593 algs = &cost->memcpy[TARGET_64BIT != 0];
19594 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19595 return stringop_alg;
19596 /* rep; movq or rep; movl is the smallest variant. */
19597 else if (!optimize_for_speed)
19599 if (!count || (count & 3))
19600 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19602 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19604 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19606 else if (expected_size != -1 && expected_size < 4)
19607 return loop_1_byte;
19608 else if (expected_size != -1)
19611 enum stringop_alg alg = libcall;
19612 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19614 /* We get here if the algorithms that were not libcall-based
19615 were rep-prefix based and we are unable to use rep prefixes
19616 based on global register usage. Break out of the loop and
19617 use the heuristic below. */
19618 if (algs->size[i].max == 0)
19620 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19622 enum stringop_alg candidate = algs->size[i].alg;
19624 if (candidate != libcall && ALG_USABLE_P (candidate))
19626 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19627 last non-libcall inline algorithm. */
19628 if (TARGET_INLINE_ALL_STRINGOPS)
19630 /* When the current size is best to be copied by a libcall,
19631 but we are still forced to inline, run the heuristic below
19632 that will pick code for medium sized blocks. */
19633 if (alg != libcall)
19637 else if (ALG_USABLE_P (candidate))
19641 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19643 /* When asked to inline the call anyway, try to pick meaningful choice.
19644 We look for maximal size of block that is faster to copy by hand and
19645 take blocks of at most of that size guessing that average size will
19646 be roughly half of the block.
19648 If this turns out to be bad, we might simply specify the preferred
19649 choice in ix86_costs. */
19650 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19651 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19654 enum stringop_alg alg;
19656 bool any_alg_usable_p = true;
19658 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19660 enum stringop_alg candidate = algs->size[i].alg;
19661 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19663 if (candidate != libcall && candidate
19664 && ALG_USABLE_P (candidate))
19665 max = algs->size[i].max;
19667 /* If there aren't any usable algorithms, then recursing on
19668 smaller sizes isn't going to find anything. Just return the
19669 simple byte-at-a-time copy loop. */
19670 if (!any_alg_usable_p)
19672 /* Pick something reasonable. */
19673 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19674 *dynamic_check = 128;
19675 return loop_1_byte;
19679 alg = decide_alg (count, max / 2, memset, dynamic_check);
19680 gcc_assert (*dynamic_check == -1);
19681 gcc_assert (alg != libcall);
19682 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19683 *dynamic_check = max;
19686 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19687 #undef ALG_USABLE_P
19690 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19691 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19693 decide_alignment (int align,
19694 enum stringop_alg alg,
19697 int desired_align = 0;
19701 gcc_unreachable ();
19703 case unrolled_loop:
19704 desired_align = GET_MODE_SIZE (Pmode);
19706 case rep_prefix_8_byte:
19709 case rep_prefix_4_byte:
19710 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19711 copying whole cacheline at once. */
19712 if (TARGET_PENTIUMPRO)
19717 case rep_prefix_1_byte:
19718 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19719 copying whole cacheline at once. */
19720 if (TARGET_PENTIUMPRO)
19734 if (desired_align < align)
19735 desired_align = align;
19736 if (expected_size != -1 && expected_size < 4)
19737 desired_align = align;
19738 return desired_align;
19741 /* Return the smallest power of 2 greater than VAL. */
19743 smallest_pow2_greater_than (int val)
19751 /* Expand string move (memcpy) operation. Use i386 string operations when
19752 profitable. expand_setmem contains similar code. The code depends upon
19753 architecture, block size and alignment, but always has the same
19756 1) Prologue guard: Conditional that jumps up to epilogues for small
19757 blocks that can be handled by epilogue alone. This is faster but
19758 also needed for correctness, since prologue assume the block is larger
19759 than the desired alignment.
19761 Optional dynamic check for size and libcall for large
19762 blocks is emitted here too, with -minline-stringops-dynamically.
19764 2) Prologue: copy first few bytes in order to get destination aligned
19765 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19766 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19767 We emit either a jump tree on power of two sized blocks, or a byte loop.
19769 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19770 with specified algorithm.
19772 4) Epilogue: code copying tail of the block that is too small to be
19773 handled by main body (or up to size guarded by prologue guard). */
19776 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19777 rtx expected_align_exp, rtx expected_size_exp)
19783 rtx jump_around_label = NULL;
19784 HOST_WIDE_INT align = 1;
19785 unsigned HOST_WIDE_INT count = 0;
19786 HOST_WIDE_INT expected_size = -1;
19787 int size_needed = 0, epilogue_size_needed;
19788 int desired_align = 0, align_bytes = 0;
19789 enum stringop_alg alg;
19791 bool need_zero_guard = false;
19793 if (CONST_INT_P (align_exp))
19794 align = INTVAL (align_exp);
19795 /* i386 can do misaligned access on reasonably increased cost. */
19796 if (CONST_INT_P (expected_align_exp)
19797 && INTVAL (expected_align_exp) > align)
19798 align = INTVAL (expected_align_exp);
19799 /* ALIGN is the minimum of destination and source alignment, but we care here
19800 just about destination alignment. */
19801 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19802 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19804 if (CONST_INT_P (count_exp))
19805 count = expected_size = INTVAL (count_exp);
19806 if (CONST_INT_P (expected_size_exp) && count == 0)
19807 expected_size = INTVAL (expected_size_exp);
19809 /* Make sure we don't need to care about overflow later on. */
19810 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19813 /* Step 0: Decide on preferred algorithm, desired alignment and
19814 size of chunks to be copied by main loop. */
19816 alg = decide_alg (count, expected_size, false, &dynamic_check);
19817 desired_align = decide_alignment (align, alg, expected_size);
19819 if (!TARGET_ALIGN_STRINGOPS)
19820 align = desired_align;
19822 if (alg == libcall)
19824 gcc_assert (alg != no_stringop);
19826 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19827 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19828 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19833 gcc_unreachable ();
19835 need_zero_guard = true;
19836 size_needed = GET_MODE_SIZE (Pmode);
19838 case unrolled_loop:
19839 need_zero_guard = true;
19840 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19842 case rep_prefix_8_byte:
19845 case rep_prefix_4_byte:
19848 case rep_prefix_1_byte:
19852 need_zero_guard = true;
19857 epilogue_size_needed = size_needed;
19859 /* Step 1: Prologue guard. */
19861 /* Alignment code needs count to be in register. */
19862 if (CONST_INT_P (count_exp) && desired_align > align)
19864 if (INTVAL (count_exp) > desired_align
19865 && INTVAL (count_exp) > size_needed)
19868 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19869 if (align_bytes <= 0)
19872 align_bytes = desired_align - align_bytes;
19874 if (align_bytes == 0)
19875 count_exp = force_reg (counter_mode (count_exp), count_exp);
19877 gcc_assert (desired_align >= 1 && align >= 1);
19879 /* Ensure that alignment prologue won't copy past end of block. */
19880 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19882 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19883 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19884 Make sure it is power of 2. */
19885 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19889 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19891 /* If main algorithm works on QImode, no epilogue is needed.
19892 For small sizes just don't align anything. */
19893 if (size_needed == 1)
19894 desired_align = align;
19901 label = gen_label_rtx ();
19902 emit_cmp_and_jump_insns (count_exp,
19903 GEN_INT (epilogue_size_needed),
19904 LTU, 0, counter_mode (count_exp), 1, label);
19905 if (expected_size == -1 || expected_size < epilogue_size_needed)
19906 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19908 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19912 /* Emit code to decide on runtime whether library call or inline should be
19914 if (dynamic_check != -1)
19916 if (CONST_INT_P (count_exp))
19918 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19920 emit_block_move_via_libcall (dst, src, count_exp, false);
19921 count_exp = const0_rtx;
19927 rtx hot_label = gen_label_rtx ();
19928 jump_around_label = gen_label_rtx ();
19929 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19930 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19931 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19932 emit_block_move_via_libcall (dst, src, count_exp, false);
19933 emit_jump (jump_around_label);
19934 emit_label (hot_label);
19938 /* Step 2: Alignment prologue. */
19940 if (desired_align > align)
19942 if (align_bytes == 0)
19944 /* Except for the first move in epilogue, we no longer know
19945 constant offset in aliasing info. It don't seems to worth
19946 the pain to maintain it for the first move, so throw away
19948 src = change_address (src, BLKmode, srcreg);
19949 dst = change_address (dst, BLKmode, destreg);
19950 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19955 /* If we know how many bytes need to be stored before dst is
19956 sufficiently aligned, maintain aliasing info accurately. */
19957 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19958 desired_align, align_bytes);
19959 count_exp = plus_constant (count_exp, -align_bytes);
19960 count -= align_bytes;
19962 if (need_zero_guard
19963 && (count < (unsigned HOST_WIDE_INT) size_needed
19964 || (align_bytes == 0
19965 && count < ((unsigned HOST_WIDE_INT) size_needed
19966 + desired_align - align))))
19968 /* It is possible that we copied enough so the main loop will not
19970 gcc_assert (size_needed > 1);
19971 if (label == NULL_RTX)
19972 label = gen_label_rtx ();
19973 emit_cmp_and_jump_insns (count_exp,
19974 GEN_INT (size_needed),
19975 LTU, 0, counter_mode (count_exp), 1, label);
19976 if (expected_size == -1
19977 || expected_size < (desired_align - align) / 2 + size_needed)
19978 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19980 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19983 if (label && size_needed == 1)
19985 emit_label (label);
19986 LABEL_NUSES (label) = 1;
19988 epilogue_size_needed = 1;
19990 else if (label == NULL_RTX)
19991 epilogue_size_needed = size_needed;
19993 /* Step 3: Main loop. */
19999 gcc_unreachable ();
20001 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20002 count_exp, QImode, 1, expected_size);
20005 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20006 count_exp, Pmode, 1, expected_size);
20008 case unrolled_loop:
20009 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20010 registers for 4 temporaries anyway. */
20011 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20012 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20015 case rep_prefix_8_byte:
20016 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20019 case rep_prefix_4_byte:
20020 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20023 case rep_prefix_1_byte:
20024 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20028 /* Adjust properly the offset of src and dest memory for aliasing. */
20029 if (CONST_INT_P (count_exp))
20031 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20032 (count / size_needed) * size_needed);
20033 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20034 (count / size_needed) * size_needed);
20038 src = change_address (src, BLKmode, srcreg);
20039 dst = change_address (dst, BLKmode, destreg);
20042 /* Step 4: Epilogue to copy the remaining bytes. */
20046 /* When the main loop is done, COUNT_EXP might hold original count,
20047 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20048 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20049 bytes. Compensate if needed. */
20051 if (size_needed < epilogue_size_needed)
20054 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20055 GEN_INT (size_needed - 1), count_exp, 1,
20057 if (tmp != count_exp)
20058 emit_move_insn (count_exp, tmp);
20060 emit_label (label);
20061 LABEL_NUSES (label) = 1;
20064 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20065 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20066 epilogue_size_needed);
20067 if (jump_around_label)
20068 emit_label (jump_around_label);
20072 /* Helper function for memcpy. For QImode value 0xXY produce
20073 0xXYXYXYXY of wide specified by MODE. This is essentially
20074 a * 0x10101010, but we can do slightly better than
20075 synth_mult by unwinding the sequence by hand on CPUs with
20078 promote_duplicated_reg (enum machine_mode mode, rtx val)
20080 enum machine_mode valmode = GET_MODE (val);
20082 int nops = mode == DImode ? 3 : 2;
20084 gcc_assert (mode == SImode || mode == DImode);
20085 if (val == const0_rtx)
20086 return copy_to_mode_reg (mode, const0_rtx);
20087 if (CONST_INT_P (val))
20089 HOST_WIDE_INT v = INTVAL (val) & 255;
20093 if (mode == DImode)
20094 v |= (v << 16) << 16;
20095 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20098 if (valmode == VOIDmode)
20100 if (valmode != QImode)
20101 val = gen_lowpart (QImode, val);
20102 if (mode == QImode)
20104 if (!TARGET_PARTIAL_REG_STALL)
20106 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20107 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20108 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20109 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20111 rtx reg = convert_modes (mode, QImode, val, true);
20112 tmp = promote_duplicated_reg (mode, const1_rtx);
20113 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20118 rtx reg = convert_modes (mode, QImode, val, true);
20120 if (!TARGET_PARTIAL_REG_STALL)
20121 if (mode == SImode)
20122 emit_insn (gen_movsi_insv_1 (reg, reg));
20124 emit_insn (gen_movdi_insv_1 (reg, reg));
20127 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20128 NULL, 1, OPTAB_DIRECT);
20130 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20132 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20133 NULL, 1, OPTAB_DIRECT);
20134 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20135 if (mode == SImode)
20137 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20138 NULL, 1, OPTAB_DIRECT);
20139 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20144 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20145 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20146 alignment from ALIGN to DESIRED_ALIGN. */
20148 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20153 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20154 promoted_val = promote_duplicated_reg (DImode, val);
20155 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20156 promoted_val = promote_duplicated_reg (SImode, val);
20157 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20158 promoted_val = promote_duplicated_reg (HImode, val);
20160 promoted_val = val;
20162 return promoted_val;
20165 /* Expand string clear operation (bzero). Use i386 string operations when
20166 profitable. See expand_movmem comment for explanation of individual
20167 steps performed. */
20169 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20170 rtx expected_align_exp, rtx expected_size_exp)
20175 rtx jump_around_label = NULL;
20176 HOST_WIDE_INT align = 1;
20177 unsigned HOST_WIDE_INT count = 0;
20178 HOST_WIDE_INT expected_size = -1;
20179 int size_needed = 0, epilogue_size_needed;
20180 int desired_align = 0, align_bytes = 0;
20181 enum stringop_alg alg;
20182 rtx promoted_val = NULL;
20183 bool force_loopy_epilogue = false;
20185 bool need_zero_guard = false;
20187 if (CONST_INT_P (align_exp))
20188 align = INTVAL (align_exp);
20189 /* i386 can do misaligned access on reasonably increased cost. */
20190 if (CONST_INT_P (expected_align_exp)
20191 && INTVAL (expected_align_exp) > align)
20192 align = INTVAL (expected_align_exp);
20193 if (CONST_INT_P (count_exp))
20194 count = expected_size = INTVAL (count_exp);
20195 if (CONST_INT_P (expected_size_exp) && count == 0)
20196 expected_size = INTVAL (expected_size_exp);
20198 /* Make sure we don't need to care about overflow later on. */
20199 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20202 /* Step 0: Decide on preferred algorithm, desired alignment and
20203 size of chunks to be copied by main loop. */
20205 alg = decide_alg (count, expected_size, true, &dynamic_check);
20206 desired_align = decide_alignment (align, alg, expected_size);
20208 if (!TARGET_ALIGN_STRINGOPS)
20209 align = desired_align;
20211 if (alg == libcall)
20213 gcc_assert (alg != no_stringop);
20215 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20216 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20221 gcc_unreachable ();
20223 need_zero_guard = true;
20224 size_needed = GET_MODE_SIZE (Pmode);
20226 case unrolled_loop:
20227 need_zero_guard = true;
20228 size_needed = GET_MODE_SIZE (Pmode) * 4;
20230 case rep_prefix_8_byte:
20233 case rep_prefix_4_byte:
20236 case rep_prefix_1_byte:
20240 need_zero_guard = true;
20244 epilogue_size_needed = size_needed;
20246 /* Step 1: Prologue guard. */
20248 /* Alignment code needs count to be in register. */
20249 if (CONST_INT_P (count_exp) && desired_align > align)
20251 if (INTVAL (count_exp) > desired_align
20252 && INTVAL (count_exp) > size_needed)
20255 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20256 if (align_bytes <= 0)
20259 align_bytes = desired_align - align_bytes;
20261 if (align_bytes == 0)
20263 enum machine_mode mode = SImode;
20264 if (TARGET_64BIT && (count & ~0xffffffff))
20266 count_exp = force_reg (mode, count_exp);
20269 /* Do the cheap promotion to allow better CSE across the
20270 main loop and epilogue (ie one load of the big constant in the
20271 front of all code. */
20272 if (CONST_INT_P (val_exp))
20273 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20274 desired_align, align);
20275 /* Ensure that alignment prologue won't copy past end of block. */
20276 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20278 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20279 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20280 Make sure it is power of 2. */
20281 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20283 /* To improve performance of small blocks, we jump around the VAL
20284 promoting mode. This mean that if the promoted VAL is not constant,
20285 we might not use it in the epilogue and have to use byte
20287 if (epilogue_size_needed > 2 && !promoted_val)
20288 force_loopy_epilogue = true;
20291 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20293 /* If main algorithm works on QImode, no epilogue is needed.
20294 For small sizes just don't align anything. */
20295 if (size_needed == 1)
20296 desired_align = align;
20303 label = gen_label_rtx ();
20304 emit_cmp_and_jump_insns (count_exp,
20305 GEN_INT (epilogue_size_needed),
20306 LTU, 0, counter_mode (count_exp), 1, label);
20307 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20308 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20310 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20313 if (dynamic_check != -1)
20315 rtx hot_label = gen_label_rtx ();
20316 jump_around_label = gen_label_rtx ();
20317 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20318 LEU, 0, counter_mode (count_exp), 1, hot_label);
20319 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20320 set_storage_via_libcall (dst, count_exp, val_exp, false);
20321 emit_jump (jump_around_label);
20322 emit_label (hot_label);
20325 /* Step 2: Alignment prologue. */
20327 /* Do the expensive promotion once we branched off the small blocks. */
20329 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20330 desired_align, align);
20331 gcc_assert (desired_align >= 1 && align >= 1);
20333 if (desired_align > align)
20335 if (align_bytes == 0)
20337 /* Except for the first move in epilogue, we no longer know
20338 constant offset in aliasing info. It don't seems to worth
20339 the pain to maintain it for the first move, so throw away
20341 dst = change_address (dst, BLKmode, destreg);
20342 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20347 /* If we know how many bytes need to be stored before dst is
20348 sufficiently aligned, maintain aliasing info accurately. */
20349 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20350 desired_align, align_bytes);
20351 count_exp = plus_constant (count_exp, -align_bytes);
20352 count -= align_bytes;
20354 if (need_zero_guard
20355 && (count < (unsigned HOST_WIDE_INT) size_needed
20356 || (align_bytes == 0
20357 && count < ((unsigned HOST_WIDE_INT) size_needed
20358 + desired_align - align))))
20360 /* It is possible that we copied enough so the main loop will not
20362 gcc_assert (size_needed > 1);
20363 if (label == NULL_RTX)
20364 label = gen_label_rtx ();
20365 emit_cmp_and_jump_insns (count_exp,
20366 GEN_INT (size_needed),
20367 LTU, 0, counter_mode (count_exp), 1, label);
20368 if (expected_size == -1
20369 || expected_size < (desired_align - align) / 2 + size_needed)
20370 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20372 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20375 if (label && size_needed == 1)
20377 emit_label (label);
20378 LABEL_NUSES (label) = 1;
20380 promoted_val = val_exp;
20381 epilogue_size_needed = 1;
20383 else if (label == NULL_RTX)
20384 epilogue_size_needed = size_needed;
20386 /* Step 3: Main loop. */
20392 gcc_unreachable ();
20394 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20395 count_exp, QImode, 1, expected_size);
20398 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20399 count_exp, Pmode, 1, expected_size);
20401 case unrolled_loop:
20402 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20403 count_exp, Pmode, 4, expected_size);
20405 case rep_prefix_8_byte:
20406 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20409 case rep_prefix_4_byte:
20410 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20413 case rep_prefix_1_byte:
20414 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20418 /* Adjust properly the offset of src and dest memory for aliasing. */
20419 if (CONST_INT_P (count_exp))
20420 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20421 (count / size_needed) * size_needed);
20423 dst = change_address (dst, BLKmode, destreg);
20425 /* Step 4: Epilogue to copy the remaining bytes. */
20429 /* When the main loop is done, COUNT_EXP might hold original count,
20430 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20431 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20432 bytes. Compensate if needed. */
20434 if (size_needed < epilogue_size_needed)
20437 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20438 GEN_INT (size_needed - 1), count_exp, 1,
20440 if (tmp != count_exp)
20441 emit_move_insn (count_exp, tmp);
20443 emit_label (label);
20444 LABEL_NUSES (label) = 1;
20447 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20449 if (force_loopy_epilogue)
20450 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20451 epilogue_size_needed);
20453 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20454 epilogue_size_needed);
20456 if (jump_around_label)
20457 emit_label (jump_around_label);
20461 /* Expand the appropriate insns for doing strlen if not just doing
20464 out = result, initialized with the start address
20465 align_rtx = alignment of the address.
20466 scratch = scratch register, initialized with the startaddress when
20467 not aligned, otherwise undefined
20469 This is just the body. It needs the initializations mentioned above and
20470 some address computing at the end. These things are done in i386.md. */
20473 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20477 rtx align_2_label = NULL_RTX;
20478 rtx align_3_label = NULL_RTX;
20479 rtx align_4_label = gen_label_rtx ();
20480 rtx end_0_label = gen_label_rtx ();
20482 rtx tmpreg = gen_reg_rtx (SImode);
20483 rtx scratch = gen_reg_rtx (SImode);
20487 if (CONST_INT_P (align_rtx))
20488 align = INTVAL (align_rtx);
20490 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20492 /* Is there a known alignment and is it less than 4? */
20495 rtx scratch1 = gen_reg_rtx (Pmode);
20496 emit_move_insn (scratch1, out);
20497 /* Is there a known alignment and is it not 2? */
20500 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20501 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20503 /* Leave just the 3 lower bits. */
20504 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20505 NULL_RTX, 0, OPTAB_WIDEN);
20507 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20508 Pmode, 1, align_4_label);
20509 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20510 Pmode, 1, align_2_label);
20511 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20512 Pmode, 1, align_3_label);
20516 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20517 check if is aligned to 4 - byte. */
20519 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20520 NULL_RTX, 0, OPTAB_WIDEN);
20522 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20523 Pmode, 1, align_4_label);
20526 mem = change_address (src, QImode, out);
20528 /* Now compare the bytes. */
20530 /* Compare the first n unaligned byte on a byte per byte basis. */
20531 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20532 QImode, 1, end_0_label);
20534 /* Increment the address. */
20535 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20537 /* Not needed with an alignment of 2 */
20540 emit_label (align_2_label);
20542 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20545 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20547 emit_label (align_3_label);
20550 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20553 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20556 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20557 align this loop. It gives only huge programs, but does not help to
20559 emit_label (align_4_label);
20561 mem = change_address (src, SImode, out);
20562 emit_move_insn (scratch, mem);
20563 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20565 /* This formula yields a nonzero result iff one of the bytes is zero.
20566 This saves three branches inside loop and many cycles. */
20568 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20569 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20570 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20571 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20572 gen_int_mode (0x80808080, SImode)));
20573 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20578 rtx reg = gen_reg_rtx (SImode);
20579 rtx reg2 = gen_reg_rtx (Pmode);
20580 emit_move_insn (reg, tmpreg);
20581 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20583 /* If zero is not in the first two bytes, move two bytes forward. */
20584 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20585 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20586 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20587 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20588 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20591 /* Emit lea manually to avoid clobbering of flags. */
20592 emit_insn (gen_rtx_SET (SImode, reg2,
20593 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20595 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20596 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20597 emit_insn (gen_rtx_SET (VOIDmode, out,
20598 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20604 rtx end_2_label = gen_label_rtx ();
20605 /* Is zero in the first two bytes? */
20607 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20608 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20609 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20610 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20611 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20613 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20614 JUMP_LABEL (tmp) = end_2_label;
20616 /* Not in the first two. Move two bytes forward. */
20617 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20618 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20620 emit_label (end_2_label);
20624 /* Avoid branch in fixing the byte. */
20625 tmpreg = gen_lowpart (QImode, tmpreg);
20626 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20627 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20628 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20629 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20631 emit_label (end_0_label);
20634 /* Expand strlen. */
20637 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20639 rtx addr, scratch1, scratch2, scratch3, scratch4;
20641 /* The generic case of strlen expander is long. Avoid it's
20642 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20644 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20645 && !TARGET_INLINE_ALL_STRINGOPS
20646 && !optimize_insn_for_size_p ()
20647 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20650 addr = force_reg (Pmode, XEXP (src, 0));
20651 scratch1 = gen_reg_rtx (Pmode);
20653 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20654 && !optimize_insn_for_size_p ())
20656 /* Well it seems that some optimizer does not combine a call like
20657 foo(strlen(bar), strlen(bar));
20658 when the move and the subtraction is done here. It does calculate
20659 the length just once when these instructions are done inside of
20660 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20661 often used and I use one fewer register for the lifetime of
20662 output_strlen_unroll() this is better. */
20664 emit_move_insn (out, addr);
20666 ix86_expand_strlensi_unroll_1 (out, src, align);
20668 /* strlensi_unroll_1 returns the address of the zero at the end of
20669 the string, like memchr(), so compute the length by subtracting
20670 the start address. */
20671 emit_insn (ix86_gen_sub3 (out, out, addr));
20677 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20678 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20681 scratch2 = gen_reg_rtx (Pmode);
20682 scratch3 = gen_reg_rtx (Pmode);
20683 scratch4 = force_reg (Pmode, constm1_rtx);
20685 emit_move_insn (scratch3, addr);
20686 eoschar = force_reg (QImode, eoschar);
20688 src = replace_equiv_address_nv (src, scratch3);
20690 /* If .md starts supporting :P, this can be done in .md. */
20691 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20692 scratch4), UNSPEC_SCAS);
20693 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20694 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20695 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20700 /* For given symbol (function) construct code to compute address of it's PLT
20701 entry in large x86-64 PIC model. */
20703 construct_plt_address (rtx symbol)
20705 rtx tmp = gen_reg_rtx (Pmode);
20706 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20708 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20709 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20711 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20712 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20717 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20719 rtx pop, int sibcall)
20721 rtx use = NULL, call;
20723 if (pop == const0_rtx)
20725 gcc_assert (!TARGET_64BIT || !pop);
20727 if (TARGET_MACHO && !TARGET_64BIT)
20730 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20731 fnaddr = machopic_indirect_call_target (fnaddr);
20736 /* Static functions and indirect calls don't need the pic register. */
20737 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20738 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20739 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20740 use_reg (&use, pic_offset_table_rtx);
20743 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20745 rtx al = gen_rtx_REG (QImode, AX_REG);
20746 emit_move_insn (al, callarg2);
20747 use_reg (&use, al);
20750 if (ix86_cmodel == CM_LARGE_PIC
20752 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20753 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20754 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20756 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20757 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20759 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20760 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20763 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20765 call = gen_rtx_SET (VOIDmode, retval, call);
20768 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20769 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20770 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20773 && ix86_cfun_abi () == MS_ABI
20774 && (!callarg2 || INTVAL (callarg2) != -2))
20776 /* We need to represent that SI and DI registers are clobbered
20778 static int clobbered_registers[] = {
20779 XMM6_REG, XMM7_REG, XMM8_REG,
20780 XMM9_REG, XMM10_REG, XMM11_REG,
20781 XMM12_REG, XMM13_REG, XMM14_REG,
20782 XMM15_REG, SI_REG, DI_REG
20785 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20786 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20787 UNSPEC_MS_TO_SYSV_CALL);
20791 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20792 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20795 (SSE_REGNO_P (clobbered_registers[i])
20797 clobbered_registers[i]));
20799 call = gen_rtx_PARALLEL (VOIDmode,
20800 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20804 call = emit_call_insn (call);
20806 CALL_INSN_FUNCTION_USAGE (call) = use;
20812 /* Clear stack slot assignments remembered from previous functions.
20813 This is called from INIT_EXPANDERS once before RTL is emitted for each
20816 static struct machine_function *
20817 ix86_init_machine_status (void)
20819 struct machine_function *f;
20821 f = ggc_alloc_cleared_machine_function ();
20822 f->use_fast_prologue_epilogue_nregs = -1;
20823 f->tls_descriptor_call_expanded_p = 0;
20824 f->call_abi = ix86_abi;
20829 /* Return a MEM corresponding to a stack slot with mode MODE.
20830 Allocate a new slot if necessary.
20832 The RTL for a function can have several slots available: N is
20833 which slot to use. */
20836 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20838 struct stack_local_entry *s;
20840 gcc_assert (n < MAX_386_STACK_LOCALS);
20842 /* Virtual slot is valid only before vregs are instantiated. */
20843 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20845 for (s = ix86_stack_locals; s; s = s->next)
20846 if (s->mode == mode && s->n == n)
20847 return copy_rtx (s->rtl);
20849 s = ggc_alloc_stack_local_entry ();
20852 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20854 s->next = ix86_stack_locals;
20855 ix86_stack_locals = s;
20859 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20861 static GTY(()) rtx ix86_tls_symbol;
20863 ix86_tls_get_addr (void)
20866 if (!ix86_tls_symbol)
20868 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20869 (TARGET_ANY_GNU_TLS
20871 ? "___tls_get_addr"
20872 : "__tls_get_addr");
20875 return ix86_tls_symbol;
20878 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20880 static GTY(()) rtx ix86_tls_module_base_symbol;
20882 ix86_tls_module_base (void)
20885 if (!ix86_tls_module_base_symbol)
20887 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20888 "_TLS_MODULE_BASE_");
20889 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20890 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20893 return ix86_tls_module_base_symbol;
20896 /* Calculate the length of the memory address in the instruction
20897 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20900 memory_address_length (rtx addr)
20902 struct ix86_address parts;
20903 rtx base, index, disp;
20907 if (GET_CODE (addr) == PRE_DEC
20908 || GET_CODE (addr) == POST_INC
20909 || GET_CODE (addr) == PRE_MODIFY
20910 || GET_CODE (addr) == POST_MODIFY)
20913 ok = ix86_decompose_address (addr, &parts);
20916 if (parts.base && GET_CODE (parts.base) == SUBREG)
20917 parts.base = SUBREG_REG (parts.base);
20918 if (parts.index && GET_CODE (parts.index) == SUBREG)
20919 parts.index = SUBREG_REG (parts.index);
20922 index = parts.index;
20927 - esp as the base always wants an index,
20928 - ebp as the base always wants a displacement,
20929 - r12 as the base always wants an index,
20930 - r13 as the base always wants a displacement. */
20932 /* Register Indirect. */
20933 if (base && !index && !disp)
20935 /* esp (for its index) and ebp (for its displacement) need
20936 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20939 && (addr == arg_pointer_rtx
20940 || addr == frame_pointer_rtx
20941 || REGNO (addr) == SP_REG
20942 || REGNO (addr) == BP_REG
20943 || REGNO (addr) == R12_REG
20944 || REGNO (addr) == R13_REG))
20948 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20949 is not disp32, but disp32(%rip), so for disp32
20950 SIB byte is needed, unless print_operand_address
20951 optimizes it into disp32(%rip) or (%rip) is implied
20953 else if (disp && !base && !index)
20960 if (GET_CODE (disp) == CONST)
20961 symbol = XEXP (disp, 0);
20962 if (GET_CODE (symbol) == PLUS
20963 && CONST_INT_P (XEXP (symbol, 1)))
20964 symbol = XEXP (symbol, 0);
20966 if (GET_CODE (symbol) != LABEL_REF
20967 && (GET_CODE (symbol) != SYMBOL_REF
20968 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20969 && (GET_CODE (symbol) != UNSPEC
20970 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20971 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20978 /* Find the length of the displacement constant. */
20981 if (base && satisfies_constraint_K (disp))
20986 /* ebp always wants a displacement. Similarly r13. */
20987 else if (base && REG_P (base)
20988 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20991 /* An index requires the two-byte modrm form.... */
20993 /* ...like esp (or r12), which always wants an index. */
20994 || base == arg_pointer_rtx
20995 || base == frame_pointer_rtx
20996 || (base && REG_P (base)
20997 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21014 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21015 is set, expect that insn have 8bit immediate alternative. */
21017 ix86_attr_length_immediate_default (rtx insn, int shortform)
21021 extract_insn_cached (insn);
21022 for (i = recog_data.n_operands - 1; i >= 0; --i)
21023 if (CONSTANT_P (recog_data.operand[i]))
21025 enum attr_mode mode = get_attr_mode (insn);
21028 if (shortform && CONST_INT_P (recog_data.operand[i]))
21030 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21037 ival = trunc_int_for_mode (ival, HImode);
21040 ival = trunc_int_for_mode (ival, SImode);
21045 if (IN_RANGE (ival, -128, 127))
21062 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21067 fatal_insn ("unknown insn mode", insn);
21072 /* Compute default value for "length_address" attribute. */
21074 ix86_attr_length_address_default (rtx insn)
21078 if (get_attr_type (insn) == TYPE_LEA)
21080 rtx set = PATTERN (insn), addr;
21082 if (GET_CODE (set) == PARALLEL)
21083 set = XVECEXP (set, 0, 0);
21085 gcc_assert (GET_CODE (set) == SET);
21087 addr = SET_SRC (set);
21088 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21090 if (GET_CODE (addr) == ZERO_EXTEND)
21091 addr = XEXP (addr, 0);
21092 if (GET_CODE (addr) == SUBREG)
21093 addr = SUBREG_REG (addr);
21096 return memory_address_length (addr);
21099 extract_insn_cached (insn);
21100 for (i = recog_data.n_operands - 1; i >= 0; --i)
21101 if (MEM_P (recog_data.operand[i]))
21103 constrain_operands_cached (reload_completed);
21104 if (which_alternative != -1)
21106 const char *constraints = recog_data.constraints[i];
21107 int alt = which_alternative;
21109 while (*constraints == '=' || *constraints == '+')
21112 while (*constraints++ != ',')
21114 /* Skip ignored operands. */
21115 if (*constraints == 'X')
21118 return memory_address_length (XEXP (recog_data.operand[i], 0));
21123 /* Compute default value for "length_vex" attribute. It includes
21124 2 or 3 byte VEX prefix and 1 opcode byte. */
21127 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21132 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21133 byte VEX prefix. */
21134 if (!has_0f_opcode || has_vex_w)
21137 /* We can always use 2 byte VEX prefix in 32bit. */
21141 extract_insn_cached (insn);
21143 for (i = recog_data.n_operands - 1; i >= 0; --i)
21144 if (REG_P (recog_data.operand[i]))
21146 /* REX.W bit uses 3 byte VEX prefix. */
21147 if (GET_MODE (recog_data.operand[i]) == DImode
21148 && GENERAL_REG_P (recog_data.operand[i]))
21153 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21154 if (MEM_P (recog_data.operand[i])
21155 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21162 /* Return the maximum number of instructions a cpu can issue. */
21165 ix86_issue_rate (void)
21169 case PROCESSOR_PENTIUM:
21170 case PROCESSOR_ATOM:
21174 case PROCESSOR_PENTIUMPRO:
21175 case PROCESSOR_PENTIUM4:
21176 case PROCESSOR_ATHLON:
21178 case PROCESSOR_AMDFAM10:
21179 case PROCESSOR_NOCONA:
21180 case PROCESSOR_GENERIC32:
21181 case PROCESSOR_GENERIC64:
21182 case PROCESSOR_BDVER1:
21185 case PROCESSOR_CORE2:
21193 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21194 by DEP_INSN and nothing set by DEP_INSN. */
21197 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21201 /* Simplify the test for uninteresting insns. */
21202 if (insn_type != TYPE_SETCC
21203 && insn_type != TYPE_ICMOV
21204 && insn_type != TYPE_FCMOV
21205 && insn_type != TYPE_IBR)
21208 if ((set = single_set (dep_insn)) != 0)
21210 set = SET_DEST (set);
21213 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21214 && XVECLEN (PATTERN (dep_insn), 0) == 2
21215 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21216 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21218 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21219 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21224 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21227 /* This test is true if the dependent insn reads the flags but
21228 not any other potentially set register. */
21229 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21232 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21238 /* Return true iff USE_INSN has a memory address with operands set by
21242 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21245 extract_insn_cached (use_insn);
21246 for (i = recog_data.n_operands - 1; i >= 0; --i)
21247 if (MEM_P (recog_data.operand[i]))
21249 rtx addr = XEXP (recog_data.operand[i], 0);
21250 return modified_in_p (addr, set_insn) != 0;
21256 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21258 enum attr_type insn_type, dep_insn_type;
21259 enum attr_memory memory;
21261 int dep_insn_code_number;
21263 /* Anti and output dependencies have zero cost on all CPUs. */
21264 if (REG_NOTE_KIND (link) != 0)
21267 dep_insn_code_number = recog_memoized (dep_insn);
21269 /* If we can't recognize the insns, we can't really do anything. */
21270 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21273 insn_type = get_attr_type (insn);
21274 dep_insn_type = get_attr_type (dep_insn);
21278 case PROCESSOR_PENTIUM:
21279 /* Address Generation Interlock adds a cycle of latency. */
21280 if (insn_type == TYPE_LEA)
21282 rtx addr = PATTERN (insn);
21284 if (GET_CODE (addr) == PARALLEL)
21285 addr = XVECEXP (addr, 0, 0);
21287 gcc_assert (GET_CODE (addr) == SET);
21289 addr = SET_SRC (addr);
21290 if (modified_in_p (addr, dep_insn))
21293 else if (ix86_agi_dependent (dep_insn, insn))
21296 /* ??? Compares pair with jump/setcc. */
21297 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21300 /* Floating point stores require value to be ready one cycle earlier. */
21301 if (insn_type == TYPE_FMOV
21302 && get_attr_memory (insn) == MEMORY_STORE
21303 && !ix86_agi_dependent (dep_insn, insn))
21307 case PROCESSOR_PENTIUMPRO:
21308 memory = get_attr_memory (insn);
21310 /* INT->FP conversion is expensive. */
21311 if (get_attr_fp_int_src (dep_insn))
21314 /* There is one cycle extra latency between an FP op and a store. */
21315 if (insn_type == TYPE_FMOV
21316 && (set = single_set (dep_insn)) != NULL_RTX
21317 && (set2 = single_set (insn)) != NULL_RTX
21318 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21319 && MEM_P (SET_DEST (set2)))
21322 /* Show ability of reorder buffer to hide latency of load by executing
21323 in parallel with previous instruction in case
21324 previous instruction is not needed to compute the address. */
21325 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21326 && !ix86_agi_dependent (dep_insn, insn))
21328 /* Claim moves to take one cycle, as core can issue one load
21329 at time and the next load can start cycle later. */
21330 if (dep_insn_type == TYPE_IMOV
21331 || dep_insn_type == TYPE_FMOV)
21339 memory = get_attr_memory (insn);
21341 /* The esp dependency is resolved before the instruction is really
21343 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21344 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21347 /* INT->FP conversion is expensive. */
21348 if (get_attr_fp_int_src (dep_insn))
21351 /* Show ability of reorder buffer to hide latency of load by executing
21352 in parallel with previous instruction in case
21353 previous instruction is not needed to compute the address. */
21354 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21355 && !ix86_agi_dependent (dep_insn, insn))
21357 /* Claim moves to take one cycle, as core can issue one load
21358 at time and the next load can start cycle later. */
21359 if (dep_insn_type == TYPE_IMOV
21360 || dep_insn_type == TYPE_FMOV)
21369 case PROCESSOR_ATHLON:
21371 case PROCESSOR_AMDFAM10:
21372 case PROCESSOR_BDVER1:
21373 case PROCESSOR_ATOM:
21374 case PROCESSOR_GENERIC32:
21375 case PROCESSOR_GENERIC64:
21376 memory = get_attr_memory (insn);
21378 /* Show ability of reorder buffer to hide latency of load by executing
21379 in parallel with previous instruction in case
21380 previous instruction is not needed to compute the address. */
21381 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21382 && !ix86_agi_dependent (dep_insn, insn))
21384 enum attr_unit unit = get_attr_unit (insn);
21387 /* Because of the difference between the length of integer and
21388 floating unit pipeline preparation stages, the memory operands
21389 for floating point are cheaper.
21391 ??? For Athlon it the difference is most probably 2. */
21392 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21395 loadcost = TARGET_ATHLON ? 2 : 0;
21397 if (cost >= loadcost)
21410 /* How many alternative schedules to try. This should be as wide as the
21411 scheduling freedom in the DFA, but no wider. Making this value too
21412 large results extra work for the scheduler. */
21415 ia32_multipass_dfa_lookahead (void)
21419 case PROCESSOR_PENTIUM:
21422 case PROCESSOR_PENTIUMPRO:
21432 /* Compute the alignment given to a constant that is being placed in memory.
21433 EXP is the constant and ALIGN is the alignment that the object would
21435 The value of this function is used instead of that alignment to align
21439 ix86_constant_alignment (tree exp, int align)
21441 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21442 || TREE_CODE (exp) == INTEGER_CST)
21444 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21446 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21449 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21450 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21451 return BITS_PER_WORD;
21456 /* Compute the alignment for a static variable.
21457 TYPE is the data type, and ALIGN is the alignment that
21458 the object would ordinarily have. The value of this function is used
21459 instead of that alignment to align the object. */
21462 ix86_data_alignment (tree type, int align)
21464 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21466 if (AGGREGATE_TYPE_P (type)
21467 && TYPE_SIZE (type)
21468 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21469 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21470 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21471 && align < max_align)
21474 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21475 to 16byte boundary. */
21478 if (AGGREGATE_TYPE_P (type)
21479 && TYPE_SIZE (type)
21480 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21481 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21482 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21486 if (TREE_CODE (type) == ARRAY_TYPE)
21488 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21490 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21493 else if (TREE_CODE (type) == COMPLEX_TYPE)
21496 if (TYPE_MODE (type) == DCmode && align < 64)
21498 if ((TYPE_MODE (type) == XCmode
21499 || TYPE_MODE (type) == TCmode) && align < 128)
21502 else if ((TREE_CODE (type) == RECORD_TYPE
21503 || TREE_CODE (type) == UNION_TYPE
21504 || TREE_CODE (type) == QUAL_UNION_TYPE)
21505 && TYPE_FIELDS (type))
21507 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21509 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21512 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21513 || TREE_CODE (type) == INTEGER_TYPE)
21515 if (TYPE_MODE (type) == DFmode && align < 64)
21517 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21524 /* Compute the alignment for a local variable or a stack slot. EXP is
21525 the data type or decl itself, MODE is the widest mode available and
21526 ALIGN is the alignment that the object would ordinarily have. The
21527 value of this macro is used instead of that alignment to align the
21531 ix86_local_alignment (tree exp, enum machine_mode mode,
21532 unsigned int align)
21536 if (exp && DECL_P (exp))
21538 type = TREE_TYPE (exp);
21547 /* Don't do dynamic stack realignment for long long objects with
21548 -mpreferred-stack-boundary=2. */
21551 && ix86_preferred_stack_boundary < 64
21552 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21553 && (!type || !TYPE_USER_ALIGN (type))
21554 && (!decl || !DECL_USER_ALIGN (decl)))
21557 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21558 register in MODE. We will return the largest alignment of XF
21562 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21563 align = GET_MODE_ALIGNMENT (DFmode);
21567 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21568 to 16byte boundary. Exact wording is:
21570 An array uses the same alignment as its elements, except that a local or
21571 global array variable of length at least 16 bytes or
21572 a C99 variable-length array variable always has alignment of at least 16 bytes.
21574 This was added to allow use of aligned SSE instructions at arrays. This
21575 rule is meant for static storage (where compiler can not do the analysis
21576 by itself). We follow it for automatic variables only when convenient.
21577 We fully control everything in the function compiled and functions from
21578 other unit can not rely on the alignment.
21580 Exclude va_list type. It is the common case of local array where
21581 we can not benefit from the alignment. */
21582 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21585 if (AGGREGATE_TYPE_P (type)
21586 && (TYPE_MAIN_VARIANT (type)
21587 != TYPE_MAIN_VARIANT (va_list_type_node))
21588 && TYPE_SIZE (type)
21589 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21590 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21591 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21594 if (TREE_CODE (type) == ARRAY_TYPE)
21596 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21598 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21601 else if (TREE_CODE (type) == COMPLEX_TYPE)
21603 if (TYPE_MODE (type) == DCmode && align < 64)
21605 if ((TYPE_MODE (type) == XCmode
21606 || TYPE_MODE (type) == TCmode) && align < 128)
21609 else if ((TREE_CODE (type) == RECORD_TYPE
21610 || TREE_CODE (type) == UNION_TYPE
21611 || TREE_CODE (type) == QUAL_UNION_TYPE)
21612 && TYPE_FIELDS (type))
21614 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21616 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21619 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21620 || TREE_CODE (type) == INTEGER_TYPE)
21623 if (TYPE_MODE (type) == DFmode && align < 64)
21625 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21631 /* Compute the minimum required alignment for dynamic stack realignment
21632 purposes for a local variable, parameter or a stack slot. EXP is
21633 the data type or decl itself, MODE is its mode and ALIGN is the
21634 alignment that the object would ordinarily have. */
21637 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21638 unsigned int align)
21642 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21645 if (exp && DECL_P (exp))
21647 type = TREE_TYPE (exp);
21656 /* Don't do dynamic stack realignment for long long objects with
21657 -mpreferred-stack-boundary=2. */
21658 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21659 && (!type || !TYPE_USER_ALIGN (type))
21660 && (!decl || !DECL_USER_ALIGN (decl)))
21666 /* Find a location for the static chain incoming to a nested function.
21667 This is a register, unless all free registers are used by arguments. */
21670 ix86_static_chain (const_tree fndecl, bool incoming_p)
21674 if (!DECL_STATIC_CHAIN (fndecl))
21679 /* We always use R10 in 64-bit mode. */
21685 /* By default in 32-bit mode we use ECX to pass the static chain. */
21688 fntype = TREE_TYPE (fndecl);
21689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21691 /* Fastcall functions use ecx/edx for arguments, which leaves
21692 us with EAX for the static chain. */
21695 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21697 /* Thiscall functions use ecx for arguments, which leaves
21698 us with EAX for the static chain. */
21701 else if (ix86_function_regparm (fntype, fndecl) == 3)
21703 /* For regparm 3, we have no free call-clobbered registers in
21704 which to store the static chain. In order to implement this,
21705 we have the trampoline push the static chain to the stack.
21706 However, we can't push a value below the return address when
21707 we call the nested function directly, so we have to use an
21708 alternate entry point. For this we use ESI, and have the
21709 alternate entry point push ESI, so that things appear the
21710 same once we're executing the nested function. */
21713 if (fndecl == current_function_decl)
21714 ix86_static_chain_on_stack = true;
21715 return gen_frame_mem (SImode,
21716 plus_constant (arg_pointer_rtx, -8));
21722 return gen_rtx_REG (Pmode, regno);
21725 /* Emit RTL insns to initialize the variable parts of a trampoline.
21726 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21727 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21728 to be passed to the target function. */
21731 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21735 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21742 /* Depending on the static chain location, either load a register
21743 with a constant, or push the constant to the stack. All of the
21744 instructions are the same size. */
21745 chain = ix86_static_chain (fndecl, true);
21748 if (REGNO (chain) == CX_REG)
21750 else if (REGNO (chain) == AX_REG)
21753 gcc_unreachable ();
21758 mem = adjust_address (m_tramp, QImode, 0);
21759 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21761 mem = adjust_address (m_tramp, SImode, 1);
21762 emit_move_insn (mem, chain_value);
21764 /* Compute offset from the end of the jmp to the target function.
21765 In the case in which the trampoline stores the static chain on
21766 the stack, we need to skip the first insn which pushes the
21767 (call-saved) register static chain; this push is 1 byte. */
21768 disp = expand_binop (SImode, sub_optab, fnaddr,
21769 plus_constant (XEXP (m_tramp, 0),
21770 MEM_P (chain) ? 9 : 10),
21771 NULL_RTX, 1, OPTAB_DIRECT);
21773 mem = adjust_address (m_tramp, QImode, 5);
21774 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21776 mem = adjust_address (m_tramp, SImode, 6);
21777 emit_move_insn (mem, disp);
21783 /* Load the function address to r11. Try to load address using
21784 the shorter movl instead of movabs. We may want to support
21785 movq for kernel mode, but kernel does not use trampolines at
21787 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21789 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21791 mem = adjust_address (m_tramp, HImode, offset);
21792 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21794 mem = adjust_address (m_tramp, SImode, offset + 2);
21795 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21800 mem = adjust_address (m_tramp, HImode, offset);
21801 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21803 mem = adjust_address (m_tramp, DImode, offset + 2);
21804 emit_move_insn (mem, fnaddr);
21808 /* Load static chain using movabs to r10. */
21809 mem = adjust_address (m_tramp, HImode, offset);
21810 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21812 mem = adjust_address (m_tramp, DImode, offset + 2);
21813 emit_move_insn (mem, chain_value);
21816 /* Jump to r11; the last (unused) byte is a nop, only there to
21817 pad the write out to a single 32-bit store. */
21818 mem = adjust_address (m_tramp, SImode, offset);
21819 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21822 gcc_assert (offset <= TRAMPOLINE_SIZE);
21825 #ifdef ENABLE_EXECUTE_STACK
21826 #ifdef CHECK_EXECUTE_STACK_ENABLED
21827 if (CHECK_EXECUTE_STACK_ENABLED)
21829 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21830 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21834 /* The following file contains several enumerations and data structures
21835 built from the definitions in i386-builtin-types.def. */
21837 #include "i386-builtin-types.inc"
21839 /* Table for the ix86 builtin non-function types. */
21840 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21842 /* Retrieve an element from the above table, building some of
21843 the types lazily. */
21846 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21848 unsigned int index;
21851 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21853 type = ix86_builtin_type_tab[(int) tcode];
21857 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21858 if (tcode <= IX86_BT_LAST_VECT)
21860 enum machine_mode mode;
21862 index = tcode - IX86_BT_LAST_PRIM - 1;
21863 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21864 mode = ix86_builtin_type_vect_mode[index];
21866 type = build_vector_type_for_mode (itype, mode);
21872 index = tcode - IX86_BT_LAST_VECT - 1;
21873 if (tcode <= IX86_BT_LAST_PTR)
21874 quals = TYPE_UNQUALIFIED;
21876 quals = TYPE_QUAL_CONST;
21878 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21879 if (quals != TYPE_UNQUALIFIED)
21880 itype = build_qualified_type (itype, quals);
21882 type = build_pointer_type (itype);
21885 ix86_builtin_type_tab[(int) tcode] = type;
21889 /* Table for the ix86 builtin function types. */
21890 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21892 /* Retrieve an element from the above table, building some of
21893 the types lazily. */
21896 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21900 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21902 type = ix86_builtin_func_type_tab[(int) tcode];
21906 if (tcode <= IX86_BT_LAST_FUNC)
21908 unsigned start = ix86_builtin_func_start[(int) tcode];
21909 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21910 tree rtype, atype, args = void_list_node;
21913 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21914 for (i = after - 1; i > start; --i)
21916 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21917 args = tree_cons (NULL, atype, args);
21920 type = build_function_type (rtype, args);
21924 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21925 enum ix86_builtin_func_type icode;
21927 icode = ix86_builtin_func_alias_base[index];
21928 type = ix86_get_builtin_func_type (icode);
21931 ix86_builtin_func_type_tab[(int) tcode] = type;
21936 /* Codes for all the SSE/MMX builtins. */
21939 IX86_BUILTIN_ADDPS,
21940 IX86_BUILTIN_ADDSS,
21941 IX86_BUILTIN_DIVPS,
21942 IX86_BUILTIN_DIVSS,
21943 IX86_BUILTIN_MULPS,
21944 IX86_BUILTIN_MULSS,
21945 IX86_BUILTIN_SUBPS,
21946 IX86_BUILTIN_SUBSS,
21948 IX86_BUILTIN_CMPEQPS,
21949 IX86_BUILTIN_CMPLTPS,
21950 IX86_BUILTIN_CMPLEPS,
21951 IX86_BUILTIN_CMPGTPS,
21952 IX86_BUILTIN_CMPGEPS,
21953 IX86_BUILTIN_CMPNEQPS,
21954 IX86_BUILTIN_CMPNLTPS,
21955 IX86_BUILTIN_CMPNLEPS,
21956 IX86_BUILTIN_CMPNGTPS,
21957 IX86_BUILTIN_CMPNGEPS,
21958 IX86_BUILTIN_CMPORDPS,
21959 IX86_BUILTIN_CMPUNORDPS,
21960 IX86_BUILTIN_CMPEQSS,
21961 IX86_BUILTIN_CMPLTSS,
21962 IX86_BUILTIN_CMPLESS,
21963 IX86_BUILTIN_CMPNEQSS,
21964 IX86_BUILTIN_CMPNLTSS,
21965 IX86_BUILTIN_CMPNLESS,
21966 IX86_BUILTIN_CMPNGTSS,
21967 IX86_BUILTIN_CMPNGESS,
21968 IX86_BUILTIN_CMPORDSS,
21969 IX86_BUILTIN_CMPUNORDSS,
21971 IX86_BUILTIN_COMIEQSS,
21972 IX86_BUILTIN_COMILTSS,
21973 IX86_BUILTIN_COMILESS,
21974 IX86_BUILTIN_COMIGTSS,
21975 IX86_BUILTIN_COMIGESS,
21976 IX86_BUILTIN_COMINEQSS,
21977 IX86_BUILTIN_UCOMIEQSS,
21978 IX86_BUILTIN_UCOMILTSS,
21979 IX86_BUILTIN_UCOMILESS,
21980 IX86_BUILTIN_UCOMIGTSS,
21981 IX86_BUILTIN_UCOMIGESS,
21982 IX86_BUILTIN_UCOMINEQSS,
21984 IX86_BUILTIN_CVTPI2PS,
21985 IX86_BUILTIN_CVTPS2PI,
21986 IX86_BUILTIN_CVTSI2SS,
21987 IX86_BUILTIN_CVTSI642SS,
21988 IX86_BUILTIN_CVTSS2SI,
21989 IX86_BUILTIN_CVTSS2SI64,
21990 IX86_BUILTIN_CVTTPS2PI,
21991 IX86_BUILTIN_CVTTSS2SI,
21992 IX86_BUILTIN_CVTTSS2SI64,
21994 IX86_BUILTIN_MAXPS,
21995 IX86_BUILTIN_MAXSS,
21996 IX86_BUILTIN_MINPS,
21997 IX86_BUILTIN_MINSS,
21999 IX86_BUILTIN_LOADUPS,
22000 IX86_BUILTIN_STOREUPS,
22001 IX86_BUILTIN_MOVSS,
22003 IX86_BUILTIN_MOVHLPS,
22004 IX86_BUILTIN_MOVLHPS,
22005 IX86_BUILTIN_LOADHPS,
22006 IX86_BUILTIN_LOADLPS,
22007 IX86_BUILTIN_STOREHPS,
22008 IX86_BUILTIN_STORELPS,
22010 IX86_BUILTIN_MASKMOVQ,
22011 IX86_BUILTIN_MOVMSKPS,
22012 IX86_BUILTIN_PMOVMSKB,
22014 IX86_BUILTIN_MOVNTPS,
22015 IX86_BUILTIN_MOVNTQ,
22017 IX86_BUILTIN_LOADDQU,
22018 IX86_BUILTIN_STOREDQU,
22020 IX86_BUILTIN_PACKSSWB,
22021 IX86_BUILTIN_PACKSSDW,
22022 IX86_BUILTIN_PACKUSWB,
22024 IX86_BUILTIN_PADDB,
22025 IX86_BUILTIN_PADDW,
22026 IX86_BUILTIN_PADDD,
22027 IX86_BUILTIN_PADDQ,
22028 IX86_BUILTIN_PADDSB,
22029 IX86_BUILTIN_PADDSW,
22030 IX86_BUILTIN_PADDUSB,
22031 IX86_BUILTIN_PADDUSW,
22032 IX86_BUILTIN_PSUBB,
22033 IX86_BUILTIN_PSUBW,
22034 IX86_BUILTIN_PSUBD,
22035 IX86_BUILTIN_PSUBQ,
22036 IX86_BUILTIN_PSUBSB,
22037 IX86_BUILTIN_PSUBSW,
22038 IX86_BUILTIN_PSUBUSB,
22039 IX86_BUILTIN_PSUBUSW,
22042 IX86_BUILTIN_PANDN,
22046 IX86_BUILTIN_PAVGB,
22047 IX86_BUILTIN_PAVGW,
22049 IX86_BUILTIN_PCMPEQB,
22050 IX86_BUILTIN_PCMPEQW,
22051 IX86_BUILTIN_PCMPEQD,
22052 IX86_BUILTIN_PCMPGTB,
22053 IX86_BUILTIN_PCMPGTW,
22054 IX86_BUILTIN_PCMPGTD,
22056 IX86_BUILTIN_PMADDWD,
22058 IX86_BUILTIN_PMAXSW,
22059 IX86_BUILTIN_PMAXUB,
22060 IX86_BUILTIN_PMINSW,
22061 IX86_BUILTIN_PMINUB,
22063 IX86_BUILTIN_PMULHUW,
22064 IX86_BUILTIN_PMULHW,
22065 IX86_BUILTIN_PMULLW,
22067 IX86_BUILTIN_PSADBW,
22068 IX86_BUILTIN_PSHUFW,
22070 IX86_BUILTIN_PSLLW,
22071 IX86_BUILTIN_PSLLD,
22072 IX86_BUILTIN_PSLLQ,
22073 IX86_BUILTIN_PSRAW,
22074 IX86_BUILTIN_PSRAD,
22075 IX86_BUILTIN_PSRLW,
22076 IX86_BUILTIN_PSRLD,
22077 IX86_BUILTIN_PSRLQ,
22078 IX86_BUILTIN_PSLLWI,
22079 IX86_BUILTIN_PSLLDI,
22080 IX86_BUILTIN_PSLLQI,
22081 IX86_BUILTIN_PSRAWI,
22082 IX86_BUILTIN_PSRADI,
22083 IX86_BUILTIN_PSRLWI,
22084 IX86_BUILTIN_PSRLDI,
22085 IX86_BUILTIN_PSRLQI,
22087 IX86_BUILTIN_PUNPCKHBW,
22088 IX86_BUILTIN_PUNPCKHWD,
22089 IX86_BUILTIN_PUNPCKHDQ,
22090 IX86_BUILTIN_PUNPCKLBW,
22091 IX86_BUILTIN_PUNPCKLWD,
22092 IX86_BUILTIN_PUNPCKLDQ,
22094 IX86_BUILTIN_SHUFPS,
22096 IX86_BUILTIN_RCPPS,
22097 IX86_BUILTIN_RCPSS,
22098 IX86_BUILTIN_RSQRTPS,
22099 IX86_BUILTIN_RSQRTPS_NR,
22100 IX86_BUILTIN_RSQRTSS,
22101 IX86_BUILTIN_RSQRTF,
22102 IX86_BUILTIN_SQRTPS,
22103 IX86_BUILTIN_SQRTPS_NR,
22104 IX86_BUILTIN_SQRTSS,
22106 IX86_BUILTIN_UNPCKHPS,
22107 IX86_BUILTIN_UNPCKLPS,
22109 IX86_BUILTIN_ANDPS,
22110 IX86_BUILTIN_ANDNPS,
22112 IX86_BUILTIN_XORPS,
22115 IX86_BUILTIN_LDMXCSR,
22116 IX86_BUILTIN_STMXCSR,
22117 IX86_BUILTIN_SFENCE,
22119 /* 3DNow! Original */
22120 IX86_BUILTIN_FEMMS,
22121 IX86_BUILTIN_PAVGUSB,
22122 IX86_BUILTIN_PF2ID,
22123 IX86_BUILTIN_PFACC,
22124 IX86_BUILTIN_PFADD,
22125 IX86_BUILTIN_PFCMPEQ,
22126 IX86_BUILTIN_PFCMPGE,
22127 IX86_BUILTIN_PFCMPGT,
22128 IX86_BUILTIN_PFMAX,
22129 IX86_BUILTIN_PFMIN,
22130 IX86_BUILTIN_PFMUL,
22131 IX86_BUILTIN_PFRCP,
22132 IX86_BUILTIN_PFRCPIT1,
22133 IX86_BUILTIN_PFRCPIT2,
22134 IX86_BUILTIN_PFRSQIT1,
22135 IX86_BUILTIN_PFRSQRT,
22136 IX86_BUILTIN_PFSUB,
22137 IX86_BUILTIN_PFSUBR,
22138 IX86_BUILTIN_PI2FD,
22139 IX86_BUILTIN_PMULHRW,
22141 /* 3DNow! Athlon Extensions */
22142 IX86_BUILTIN_PF2IW,
22143 IX86_BUILTIN_PFNACC,
22144 IX86_BUILTIN_PFPNACC,
22145 IX86_BUILTIN_PI2FW,
22146 IX86_BUILTIN_PSWAPDSI,
22147 IX86_BUILTIN_PSWAPDSF,
22150 IX86_BUILTIN_ADDPD,
22151 IX86_BUILTIN_ADDSD,
22152 IX86_BUILTIN_DIVPD,
22153 IX86_BUILTIN_DIVSD,
22154 IX86_BUILTIN_MULPD,
22155 IX86_BUILTIN_MULSD,
22156 IX86_BUILTIN_SUBPD,
22157 IX86_BUILTIN_SUBSD,
22159 IX86_BUILTIN_CMPEQPD,
22160 IX86_BUILTIN_CMPLTPD,
22161 IX86_BUILTIN_CMPLEPD,
22162 IX86_BUILTIN_CMPGTPD,
22163 IX86_BUILTIN_CMPGEPD,
22164 IX86_BUILTIN_CMPNEQPD,
22165 IX86_BUILTIN_CMPNLTPD,
22166 IX86_BUILTIN_CMPNLEPD,
22167 IX86_BUILTIN_CMPNGTPD,
22168 IX86_BUILTIN_CMPNGEPD,
22169 IX86_BUILTIN_CMPORDPD,
22170 IX86_BUILTIN_CMPUNORDPD,
22171 IX86_BUILTIN_CMPEQSD,
22172 IX86_BUILTIN_CMPLTSD,
22173 IX86_BUILTIN_CMPLESD,
22174 IX86_BUILTIN_CMPNEQSD,
22175 IX86_BUILTIN_CMPNLTSD,
22176 IX86_BUILTIN_CMPNLESD,
22177 IX86_BUILTIN_CMPORDSD,
22178 IX86_BUILTIN_CMPUNORDSD,
22180 IX86_BUILTIN_COMIEQSD,
22181 IX86_BUILTIN_COMILTSD,
22182 IX86_BUILTIN_COMILESD,
22183 IX86_BUILTIN_COMIGTSD,
22184 IX86_BUILTIN_COMIGESD,
22185 IX86_BUILTIN_COMINEQSD,
22186 IX86_BUILTIN_UCOMIEQSD,
22187 IX86_BUILTIN_UCOMILTSD,
22188 IX86_BUILTIN_UCOMILESD,
22189 IX86_BUILTIN_UCOMIGTSD,
22190 IX86_BUILTIN_UCOMIGESD,
22191 IX86_BUILTIN_UCOMINEQSD,
22193 IX86_BUILTIN_MAXPD,
22194 IX86_BUILTIN_MAXSD,
22195 IX86_BUILTIN_MINPD,
22196 IX86_BUILTIN_MINSD,
22198 IX86_BUILTIN_ANDPD,
22199 IX86_BUILTIN_ANDNPD,
22201 IX86_BUILTIN_XORPD,
22203 IX86_BUILTIN_SQRTPD,
22204 IX86_BUILTIN_SQRTSD,
22206 IX86_BUILTIN_UNPCKHPD,
22207 IX86_BUILTIN_UNPCKLPD,
22209 IX86_BUILTIN_SHUFPD,
22211 IX86_BUILTIN_LOADUPD,
22212 IX86_BUILTIN_STOREUPD,
22213 IX86_BUILTIN_MOVSD,
22215 IX86_BUILTIN_LOADHPD,
22216 IX86_BUILTIN_LOADLPD,
22218 IX86_BUILTIN_CVTDQ2PD,
22219 IX86_BUILTIN_CVTDQ2PS,
22221 IX86_BUILTIN_CVTPD2DQ,
22222 IX86_BUILTIN_CVTPD2PI,
22223 IX86_BUILTIN_CVTPD2PS,
22224 IX86_BUILTIN_CVTTPD2DQ,
22225 IX86_BUILTIN_CVTTPD2PI,
22227 IX86_BUILTIN_CVTPI2PD,
22228 IX86_BUILTIN_CVTSI2SD,
22229 IX86_BUILTIN_CVTSI642SD,
22231 IX86_BUILTIN_CVTSD2SI,
22232 IX86_BUILTIN_CVTSD2SI64,
22233 IX86_BUILTIN_CVTSD2SS,
22234 IX86_BUILTIN_CVTSS2SD,
22235 IX86_BUILTIN_CVTTSD2SI,
22236 IX86_BUILTIN_CVTTSD2SI64,
22238 IX86_BUILTIN_CVTPS2DQ,
22239 IX86_BUILTIN_CVTPS2PD,
22240 IX86_BUILTIN_CVTTPS2DQ,
22242 IX86_BUILTIN_MOVNTI,
22243 IX86_BUILTIN_MOVNTPD,
22244 IX86_BUILTIN_MOVNTDQ,
22246 IX86_BUILTIN_MOVQ128,
22249 IX86_BUILTIN_MASKMOVDQU,
22250 IX86_BUILTIN_MOVMSKPD,
22251 IX86_BUILTIN_PMOVMSKB128,
22253 IX86_BUILTIN_PACKSSWB128,
22254 IX86_BUILTIN_PACKSSDW128,
22255 IX86_BUILTIN_PACKUSWB128,
22257 IX86_BUILTIN_PADDB128,
22258 IX86_BUILTIN_PADDW128,
22259 IX86_BUILTIN_PADDD128,
22260 IX86_BUILTIN_PADDQ128,
22261 IX86_BUILTIN_PADDSB128,
22262 IX86_BUILTIN_PADDSW128,
22263 IX86_BUILTIN_PADDUSB128,
22264 IX86_BUILTIN_PADDUSW128,
22265 IX86_BUILTIN_PSUBB128,
22266 IX86_BUILTIN_PSUBW128,
22267 IX86_BUILTIN_PSUBD128,
22268 IX86_BUILTIN_PSUBQ128,
22269 IX86_BUILTIN_PSUBSB128,
22270 IX86_BUILTIN_PSUBSW128,
22271 IX86_BUILTIN_PSUBUSB128,
22272 IX86_BUILTIN_PSUBUSW128,
22274 IX86_BUILTIN_PAND128,
22275 IX86_BUILTIN_PANDN128,
22276 IX86_BUILTIN_POR128,
22277 IX86_BUILTIN_PXOR128,
22279 IX86_BUILTIN_PAVGB128,
22280 IX86_BUILTIN_PAVGW128,
22282 IX86_BUILTIN_PCMPEQB128,
22283 IX86_BUILTIN_PCMPEQW128,
22284 IX86_BUILTIN_PCMPEQD128,
22285 IX86_BUILTIN_PCMPGTB128,
22286 IX86_BUILTIN_PCMPGTW128,
22287 IX86_BUILTIN_PCMPGTD128,
22289 IX86_BUILTIN_PMADDWD128,
22291 IX86_BUILTIN_PMAXSW128,
22292 IX86_BUILTIN_PMAXUB128,
22293 IX86_BUILTIN_PMINSW128,
22294 IX86_BUILTIN_PMINUB128,
22296 IX86_BUILTIN_PMULUDQ,
22297 IX86_BUILTIN_PMULUDQ128,
22298 IX86_BUILTIN_PMULHUW128,
22299 IX86_BUILTIN_PMULHW128,
22300 IX86_BUILTIN_PMULLW128,
22302 IX86_BUILTIN_PSADBW128,
22303 IX86_BUILTIN_PSHUFHW,
22304 IX86_BUILTIN_PSHUFLW,
22305 IX86_BUILTIN_PSHUFD,
22307 IX86_BUILTIN_PSLLDQI128,
22308 IX86_BUILTIN_PSLLWI128,
22309 IX86_BUILTIN_PSLLDI128,
22310 IX86_BUILTIN_PSLLQI128,
22311 IX86_BUILTIN_PSRAWI128,
22312 IX86_BUILTIN_PSRADI128,
22313 IX86_BUILTIN_PSRLDQI128,
22314 IX86_BUILTIN_PSRLWI128,
22315 IX86_BUILTIN_PSRLDI128,
22316 IX86_BUILTIN_PSRLQI128,
22318 IX86_BUILTIN_PSLLDQ128,
22319 IX86_BUILTIN_PSLLW128,
22320 IX86_BUILTIN_PSLLD128,
22321 IX86_BUILTIN_PSLLQ128,
22322 IX86_BUILTIN_PSRAW128,
22323 IX86_BUILTIN_PSRAD128,
22324 IX86_BUILTIN_PSRLW128,
22325 IX86_BUILTIN_PSRLD128,
22326 IX86_BUILTIN_PSRLQ128,
22328 IX86_BUILTIN_PUNPCKHBW128,
22329 IX86_BUILTIN_PUNPCKHWD128,
22330 IX86_BUILTIN_PUNPCKHDQ128,
22331 IX86_BUILTIN_PUNPCKHQDQ128,
22332 IX86_BUILTIN_PUNPCKLBW128,
22333 IX86_BUILTIN_PUNPCKLWD128,
22334 IX86_BUILTIN_PUNPCKLDQ128,
22335 IX86_BUILTIN_PUNPCKLQDQ128,
22337 IX86_BUILTIN_CLFLUSH,
22338 IX86_BUILTIN_MFENCE,
22339 IX86_BUILTIN_LFENCE,
22341 IX86_BUILTIN_BSRSI,
22342 IX86_BUILTIN_BSRDI,
22343 IX86_BUILTIN_RDPMC,
22344 IX86_BUILTIN_RDTSC,
22345 IX86_BUILTIN_RDTSCP,
22346 IX86_BUILTIN_ROLQI,
22347 IX86_BUILTIN_ROLHI,
22348 IX86_BUILTIN_RORQI,
22349 IX86_BUILTIN_RORHI,
22352 IX86_BUILTIN_ADDSUBPS,
22353 IX86_BUILTIN_HADDPS,
22354 IX86_BUILTIN_HSUBPS,
22355 IX86_BUILTIN_MOVSHDUP,
22356 IX86_BUILTIN_MOVSLDUP,
22357 IX86_BUILTIN_ADDSUBPD,
22358 IX86_BUILTIN_HADDPD,
22359 IX86_BUILTIN_HSUBPD,
22360 IX86_BUILTIN_LDDQU,
22362 IX86_BUILTIN_MONITOR,
22363 IX86_BUILTIN_MWAIT,
22366 IX86_BUILTIN_PHADDW,
22367 IX86_BUILTIN_PHADDD,
22368 IX86_BUILTIN_PHADDSW,
22369 IX86_BUILTIN_PHSUBW,
22370 IX86_BUILTIN_PHSUBD,
22371 IX86_BUILTIN_PHSUBSW,
22372 IX86_BUILTIN_PMADDUBSW,
22373 IX86_BUILTIN_PMULHRSW,
22374 IX86_BUILTIN_PSHUFB,
22375 IX86_BUILTIN_PSIGNB,
22376 IX86_BUILTIN_PSIGNW,
22377 IX86_BUILTIN_PSIGND,
22378 IX86_BUILTIN_PALIGNR,
22379 IX86_BUILTIN_PABSB,
22380 IX86_BUILTIN_PABSW,
22381 IX86_BUILTIN_PABSD,
22383 IX86_BUILTIN_PHADDW128,
22384 IX86_BUILTIN_PHADDD128,
22385 IX86_BUILTIN_PHADDSW128,
22386 IX86_BUILTIN_PHSUBW128,
22387 IX86_BUILTIN_PHSUBD128,
22388 IX86_BUILTIN_PHSUBSW128,
22389 IX86_BUILTIN_PMADDUBSW128,
22390 IX86_BUILTIN_PMULHRSW128,
22391 IX86_BUILTIN_PSHUFB128,
22392 IX86_BUILTIN_PSIGNB128,
22393 IX86_BUILTIN_PSIGNW128,
22394 IX86_BUILTIN_PSIGND128,
22395 IX86_BUILTIN_PALIGNR128,
22396 IX86_BUILTIN_PABSB128,
22397 IX86_BUILTIN_PABSW128,
22398 IX86_BUILTIN_PABSD128,
22400 /* AMDFAM10 - SSE4A New Instructions. */
22401 IX86_BUILTIN_MOVNTSD,
22402 IX86_BUILTIN_MOVNTSS,
22403 IX86_BUILTIN_EXTRQI,
22404 IX86_BUILTIN_EXTRQ,
22405 IX86_BUILTIN_INSERTQI,
22406 IX86_BUILTIN_INSERTQ,
22409 IX86_BUILTIN_BLENDPD,
22410 IX86_BUILTIN_BLENDPS,
22411 IX86_BUILTIN_BLENDVPD,
22412 IX86_BUILTIN_BLENDVPS,
22413 IX86_BUILTIN_PBLENDVB128,
22414 IX86_BUILTIN_PBLENDW128,
22419 IX86_BUILTIN_INSERTPS128,
22421 IX86_BUILTIN_MOVNTDQA,
22422 IX86_BUILTIN_MPSADBW128,
22423 IX86_BUILTIN_PACKUSDW128,
22424 IX86_BUILTIN_PCMPEQQ,
22425 IX86_BUILTIN_PHMINPOSUW128,
22427 IX86_BUILTIN_PMAXSB128,
22428 IX86_BUILTIN_PMAXSD128,
22429 IX86_BUILTIN_PMAXUD128,
22430 IX86_BUILTIN_PMAXUW128,
22432 IX86_BUILTIN_PMINSB128,
22433 IX86_BUILTIN_PMINSD128,
22434 IX86_BUILTIN_PMINUD128,
22435 IX86_BUILTIN_PMINUW128,
22437 IX86_BUILTIN_PMOVSXBW128,
22438 IX86_BUILTIN_PMOVSXBD128,
22439 IX86_BUILTIN_PMOVSXBQ128,
22440 IX86_BUILTIN_PMOVSXWD128,
22441 IX86_BUILTIN_PMOVSXWQ128,
22442 IX86_BUILTIN_PMOVSXDQ128,
22444 IX86_BUILTIN_PMOVZXBW128,
22445 IX86_BUILTIN_PMOVZXBD128,
22446 IX86_BUILTIN_PMOVZXBQ128,
22447 IX86_BUILTIN_PMOVZXWD128,
22448 IX86_BUILTIN_PMOVZXWQ128,
22449 IX86_BUILTIN_PMOVZXDQ128,
22451 IX86_BUILTIN_PMULDQ128,
22452 IX86_BUILTIN_PMULLD128,
22454 IX86_BUILTIN_ROUNDPD,
22455 IX86_BUILTIN_ROUNDPS,
22456 IX86_BUILTIN_ROUNDSD,
22457 IX86_BUILTIN_ROUNDSS,
22459 IX86_BUILTIN_PTESTZ,
22460 IX86_BUILTIN_PTESTC,
22461 IX86_BUILTIN_PTESTNZC,
22463 IX86_BUILTIN_VEC_INIT_V2SI,
22464 IX86_BUILTIN_VEC_INIT_V4HI,
22465 IX86_BUILTIN_VEC_INIT_V8QI,
22466 IX86_BUILTIN_VEC_EXT_V2DF,
22467 IX86_BUILTIN_VEC_EXT_V2DI,
22468 IX86_BUILTIN_VEC_EXT_V4SF,
22469 IX86_BUILTIN_VEC_EXT_V4SI,
22470 IX86_BUILTIN_VEC_EXT_V8HI,
22471 IX86_BUILTIN_VEC_EXT_V2SI,
22472 IX86_BUILTIN_VEC_EXT_V4HI,
22473 IX86_BUILTIN_VEC_EXT_V16QI,
22474 IX86_BUILTIN_VEC_SET_V2DI,
22475 IX86_BUILTIN_VEC_SET_V4SF,
22476 IX86_BUILTIN_VEC_SET_V4SI,
22477 IX86_BUILTIN_VEC_SET_V8HI,
22478 IX86_BUILTIN_VEC_SET_V4HI,
22479 IX86_BUILTIN_VEC_SET_V16QI,
22481 IX86_BUILTIN_VEC_PACK_SFIX,
22484 IX86_BUILTIN_CRC32QI,
22485 IX86_BUILTIN_CRC32HI,
22486 IX86_BUILTIN_CRC32SI,
22487 IX86_BUILTIN_CRC32DI,
22489 IX86_BUILTIN_PCMPESTRI128,
22490 IX86_BUILTIN_PCMPESTRM128,
22491 IX86_BUILTIN_PCMPESTRA128,
22492 IX86_BUILTIN_PCMPESTRC128,
22493 IX86_BUILTIN_PCMPESTRO128,
22494 IX86_BUILTIN_PCMPESTRS128,
22495 IX86_BUILTIN_PCMPESTRZ128,
22496 IX86_BUILTIN_PCMPISTRI128,
22497 IX86_BUILTIN_PCMPISTRM128,
22498 IX86_BUILTIN_PCMPISTRA128,
22499 IX86_BUILTIN_PCMPISTRC128,
22500 IX86_BUILTIN_PCMPISTRO128,
22501 IX86_BUILTIN_PCMPISTRS128,
22502 IX86_BUILTIN_PCMPISTRZ128,
22504 IX86_BUILTIN_PCMPGTQ,
22506 /* AES instructions */
22507 IX86_BUILTIN_AESENC128,
22508 IX86_BUILTIN_AESENCLAST128,
22509 IX86_BUILTIN_AESDEC128,
22510 IX86_BUILTIN_AESDECLAST128,
22511 IX86_BUILTIN_AESIMC128,
22512 IX86_BUILTIN_AESKEYGENASSIST128,
22514 /* PCLMUL instruction */
22515 IX86_BUILTIN_PCLMULQDQ128,
22518 IX86_BUILTIN_ADDPD256,
22519 IX86_BUILTIN_ADDPS256,
22520 IX86_BUILTIN_ADDSUBPD256,
22521 IX86_BUILTIN_ADDSUBPS256,
22522 IX86_BUILTIN_ANDPD256,
22523 IX86_BUILTIN_ANDPS256,
22524 IX86_BUILTIN_ANDNPD256,
22525 IX86_BUILTIN_ANDNPS256,
22526 IX86_BUILTIN_BLENDPD256,
22527 IX86_BUILTIN_BLENDPS256,
22528 IX86_BUILTIN_BLENDVPD256,
22529 IX86_BUILTIN_BLENDVPS256,
22530 IX86_BUILTIN_DIVPD256,
22531 IX86_BUILTIN_DIVPS256,
22532 IX86_BUILTIN_DPPS256,
22533 IX86_BUILTIN_HADDPD256,
22534 IX86_BUILTIN_HADDPS256,
22535 IX86_BUILTIN_HSUBPD256,
22536 IX86_BUILTIN_HSUBPS256,
22537 IX86_BUILTIN_MAXPD256,
22538 IX86_BUILTIN_MAXPS256,
22539 IX86_BUILTIN_MINPD256,
22540 IX86_BUILTIN_MINPS256,
22541 IX86_BUILTIN_MULPD256,
22542 IX86_BUILTIN_MULPS256,
22543 IX86_BUILTIN_ORPD256,
22544 IX86_BUILTIN_ORPS256,
22545 IX86_BUILTIN_SHUFPD256,
22546 IX86_BUILTIN_SHUFPS256,
22547 IX86_BUILTIN_SUBPD256,
22548 IX86_BUILTIN_SUBPS256,
22549 IX86_BUILTIN_XORPD256,
22550 IX86_BUILTIN_XORPS256,
22551 IX86_BUILTIN_CMPSD,
22552 IX86_BUILTIN_CMPSS,
22553 IX86_BUILTIN_CMPPD,
22554 IX86_BUILTIN_CMPPS,
22555 IX86_BUILTIN_CMPPD256,
22556 IX86_BUILTIN_CMPPS256,
22557 IX86_BUILTIN_CVTDQ2PD256,
22558 IX86_BUILTIN_CVTDQ2PS256,
22559 IX86_BUILTIN_CVTPD2PS256,
22560 IX86_BUILTIN_CVTPS2DQ256,
22561 IX86_BUILTIN_CVTPS2PD256,
22562 IX86_BUILTIN_CVTTPD2DQ256,
22563 IX86_BUILTIN_CVTPD2DQ256,
22564 IX86_BUILTIN_CVTTPS2DQ256,
22565 IX86_BUILTIN_EXTRACTF128PD256,
22566 IX86_BUILTIN_EXTRACTF128PS256,
22567 IX86_BUILTIN_EXTRACTF128SI256,
22568 IX86_BUILTIN_VZEROALL,
22569 IX86_BUILTIN_VZEROUPPER,
22570 IX86_BUILTIN_VPERMILVARPD,
22571 IX86_BUILTIN_VPERMILVARPS,
22572 IX86_BUILTIN_VPERMILVARPD256,
22573 IX86_BUILTIN_VPERMILVARPS256,
22574 IX86_BUILTIN_VPERMILPD,
22575 IX86_BUILTIN_VPERMILPS,
22576 IX86_BUILTIN_VPERMILPD256,
22577 IX86_BUILTIN_VPERMILPS256,
22578 IX86_BUILTIN_VPERMIL2PD,
22579 IX86_BUILTIN_VPERMIL2PS,
22580 IX86_BUILTIN_VPERMIL2PD256,
22581 IX86_BUILTIN_VPERMIL2PS256,
22582 IX86_BUILTIN_VPERM2F128PD256,
22583 IX86_BUILTIN_VPERM2F128PS256,
22584 IX86_BUILTIN_VPERM2F128SI256,
22585 IX86_BUILTIN_VBROADCASTSS,
22586 IX86_BUILTIN_VBROADCASTSD256,
22587 IX86_BUILTIN_VBROADCASTSS256,
22588 IX86_BUILTIN_VBROADCASTPD256,
22589 IX86_BUILTIN_VBROADCASTPS256,
22590 IX86_BUILTIN_VINSERTF128PD256,
22591 IX86_BUILTIN_VINSERTF128PS256,
22592 IX86_BUILTIN_VINSERTF128SI256,
22593 IX86_BUILTIN_LOADUPD256,
22594 IX86_BUILTIN_LOADUPS256,
22595 IX86_BUILTIN_STOREUPD256,
22596 IX86_BUILTIN_STOREUPS256,
22597 IX86_BUILTIN_LDDQU256,
22598 IX86_BUILTIN_MOVNTDQ256,
22599 IX86_BUILTIN_MOVNTPD256,
22600 IX86_BUILTIN_MOVNTPS256,
22601 IX86_BUILTIN_LOADDQU256,
22602 IX86_BUILTIN_STOREDQU256,
22603 IX86_BUILTIN_MASKLOADPD,
22604 IX86_BUILTIN_MASKLOADPS,
22605 IX86_BUILTIN_MASKSTOREPD,
22606 IX86_BUILTIN_MASKSTOREPS,
22607 IX86_BUILTIN_MASKLOADPD256,
22608 IX86_BUILTIN_MASKLOADPS256,
22609 IX86_BUILTIN_MASKSTOREPD256,
22610 IX86_BUILTIN_MASKSTOREPS256,
22611 IX86_BUILTIN_MOVSHDUP256,
22612 IX86_BUILTIN_MOVSLDUP256,
22613 IX86_BUILTIN_MOVDDUP256,
22615 IX86_BUILTIN_SQRTPD256,
22616 IX86_BUILTIN_SQRTPS256,
22617 IX86_BUILTIN_SQRTPS_NR256,
22618 IX86_BUILTIN_RSQRTPS256,
22619 IX86_BUILTIN_RSQRTPS_NR256,
22621 IX86_BUILTIN_RCPPS256,
22623 IX86_BUILTIN_ROUNDPD256,
22624 IX86_BUILTIN_ROUNDPS256,
22626 IX86_BUILTIN_UNPCKHPD256,
22627 IX86_BUILTIN_UNPCKLPD256,
22628 IX86_BUILTIN_UNPCKHPS256,
22629 IX86_BUILTIN_UNPCKLPS256,
22631 IX86_BUILTIN_SI256_SI,
22632 IX86_BUILTIN_PS256_PS,
22633 IX86_BUILTIN_PD256_PD,
22634 IX86_BUILTIN_SI_SI256,
22635 IX86_BUILTIN_PS_PS256,
22636 IX86_BUILTIN_PD_PD256,
22638 IX86_BUILTIN_VTESTZPD,
22639 IX86_BUILTIN_VTESTCPD,
22640 IX86_BUILTIN_VTESTNZCPD,
22641 IX86_BUILTIN_VTESTZPS,
22642 IX86_BUILTIN_VTESTCPS,
22643 IX86_BUILTIN_VTESTNZCPS,
22644 IX86_BUILTIN_VTESTZPD256,
22645 IX86_BUILTIN_VTESTCPD256,
22646 IX86_BUILTIN_VTESTNZCPD256,
22647 IX86_BUILTIN_VTESTZPS256,
22648 IX86_BUILTIN_VTESTCPS256,
22649 IX86_BUILTIN_VTESTNZCPS256,
22650 IX86_BUILTIN_PTESTZ256,
22651 IX86_BUILTIN_PTESTC256,
22652 IX86_BUILTIN_PTESTNZC256,
22654 IX86_BUILTIN_MOVMSKPD256,
22655 IX86_BUILTIN_MOVMSKPS256,
22657 /* TFmode support builtins. */
22659 IX86_BUILTIN_HUGE_VALQ,
22660 IX86_BUILTIN_FABSQ,
22661 IX86_BUILTIN_COPYSIGNQ,
22663 /* Vectorizer support builtins. */
22664 IX86_BUILTIN_CPYSGNPS,
22665 IX86_BUILTIN_CPYSGNPD,
22667 IX86_BUILTIN_CVTUDQ2PS,
22669 IX86_BUILTIN_VEC_PERM_V2DF,
22670 IX86_BUILTIN_VEC_PERM_V4SF,
22671 IX86_BUILTIN_VEC_PERM_V2DI,
22672 IX86_BUILTIN_VEC_PERM_V4SI,
22673 IX86_BUILTIN_VEC_PERM_V8HI,
22674 IX86_BUILTIN_VEC_PERM_V16QI,
22675 IX86_BUILTIN_VEC_PERM_V2DI_U,
22676 IX86_BUILTIN_VEC_PERM_V4SI_U,
22677 IX86_BUILTIN_VEC_PERM_V8HI_U,
22678 IX86_BUILTIN_VEC_PERM_V16QI_U,
22679 IX86_BUILTIN_VEC_PERM_V4DF,
22680 IX86_BUILTIN_VEC_PERM_V8SF,
22682 /* FMA4 and XOP instructions. */
22683 IX86_BUILTIN_VFMADDSS,
22684 IX86_BUILTIN_VFMADDSD,
22685 IX86_BUILTIN_VFMADDPS,
22686 IX86_BUILTIN_VFMADDPD,
22687 IX86_BUILTIN_VFMSUBSS,
22688 IX86_BUILTIN_VFMSUBSD,
22689 IX86_BUILTIN_VFMSUBPS,
22690 IX86_BUILTIN_VFMSUBPD,
22691 IX86_BUILTIN_VFMADDSUBPS,
22692 IX86_BUILTIN_VFMADDSUBPD,
22693 IX86_BUILTIN_VFMSUBADDPS,
22694 IX86_BUILTIN_VFMSUBADDPD,
22695 IX86_BUILTIN_VFNMADDSS,
22696 IX86_BUILTIN_VFNMADDSD,
22697 IX86_BUILTIN_VFNMADDPS,
22698 IX86_BUILTIN_VFNMADDPD,
22699 IX86_BUILTIN_VFNMSUBSS,
22700 IX86_BUILTIN_VFNMSUBSD,
22701 IX86_BUILTIN_VFNMSUBPS,
22702 IX86_BUILTIN_VFNMSUBPD,
22703 IX86_BUILTIN_VFMADDPS256,
22704 IX86_BUILTIN_VFMADDPD256,
22705 IX86_BUILTIN_VFMSUBPS256,
22706 IX86_BUILTIN_VFMSUBPD256,
22707 IX86_BUILTIN_VFMADDSUBPS256,
22708 IX86_BUILTIN_VFMADDSUBPD256,
22709 IX86_BUILTIN_VFMSUBADDPS256,
22710 IX86_BUILTIN_VFMSUBADDPD256,
22711 IX86_BUILTIN_VFNMADDPS256,
22712 IX86_BUILTIN_VFNMADDPD256,
22713 IX86_BUILTIN_VFNMSUBPS256,
22714 IX86_BUILTIN_VFNMSUBPD256,
22716 IX86_BUILTIN_VPCMOV,
22717 IX86_BUILTIN_VPCMOV_V2DI,
22718 IX86_BUILTIN_VPCMOV_V4SI,
22719 IX86_BUILTIN_VPCMOV_V8HI,
22720 IX86_BUILTIN_VPCMOV_V16QI,
22721 IX86_BUILTIN_VPCMOV_V4SF,
22722 IX86_BUILTIN_VPCMOV_V2DF,
22723 IX86_BUILTIN_VPCMOV256,
22724 IX86_BUILTIN_VPCMOV_V4DI256,
22725 IX86_BUILTIN_VPCMOV_V8SI256,
22726 IX86_BUILTIN_VPCMOV_V16HI256,
22727 IX86_BUILTIN_VPCMOV_V32QI256,
22728 IX86_BUILTIN_VPCMOV_V8SF256,
22729 IX86_BUILTIN_VPCMOV_V4DF256,
22731 IX86_BUILTIN_VPPERM,
22733 IX86_BUILTIN_VPMACSSWW,
22734 IX86_BUILTIN_VPMACSWW,
22735 IX86_BUILTIN_VPMACSSWD,
22736 IX86_BUILTIN_VPMACSWD,
22737 IX86_BUILTIN_VPMACSSDD,
22738 IX86_BUILTIN_VPMACSDD,
22739 IX86_BUILTIN_VPMACSSDQL,
22740 IX86_BUILTIN_VPMACSSDQH,
22741 IX86_BUILTIN_VPMACSDQL,
22742 IX86_BUILTIN_VPMACSDQH,
22743 IX86_BUILTIN_VPMADCSSWD,
22744 IX86_BUILTIN_VPMADCSWD,
22746 IX86_BUILTIN_VPHADDBW,
22747 IX86_BUILTIN_VPHADDBD,
22748 IX86_BUILTIN_VPHADDBQ,
22749 IX86_BUILTIN_VPHADDWD,
22750 IX86_BUILTIN_VPHADDWQ,
22751 IX86_BUILTIN_VPHADDDQ,
22752 IX86_BUILTIN_VPHADDUBW,
22753 IX86_BUILTIN_VPHADDUBD,
22754 IX86_BUILTIN_VPHADDUBQ,
22755 IX86_BUILTIN_VPHADDUWD,
22756 IX86_BUILTIN_VPHADDUWQ,
22757 IX86_BUILTIN_VPHADDUDQ,
22758 IX86_BUILTIN_VPHSUBBW,
22759 IX86_BUILTIN_VPHSUBWD,
22760 IX86_BUILTIN_VPHSUBDQ,
22762 IX86_BUILTIN_VPROTB,
22763 IX86_BUILTIN_VPROTW,
22764 IX86_BUILTIN_VPROTD,
22765 IX86_BUILTIN_VPROTQ,
22766 IX86_BUILTIN_VPROTB_IMM,
22767 IX86_BUILTIN_VPROTW_IMM,
22768 IX86_BUILTIN_VPROTD_IMM,
22769 IX86_BUILTIN_VPROTQ_IMM,
22771 IX86_BUILTIN_VPSHLB,
22772 IX86_BUILTIN_VPSHLW,
22773 IX86_BUILTIN_VPSHLD,
22774 IX86_BUILTIN_VPSHLQ,
22775 IX86_BUILTIN_VPSHAB,
22776 IX86_BUILTIN_VPSHAW,
22777 IX86_BUILTIN_VPSHAD,
22778 IX86_BUILTIN_VPSHAQ,
22780 IX86_BUILTIN_VFRCZSS,
22781 IX86_BUILTIN_VFRCZSD,
22782 IX86_BUILTIN_VFRCZPS,
22783 IX86_BUILTIN_VFRCZPD,
22784 IX86_BUILTIN_VFRCZPS256,
22785 IX86_BUILTIN_VFRCZPD256,
22787 IX86_BUILTIN_VPCOMEQUB,
22788 IX86_BUILTIN_VPCOMNEUB,
22789 IX86_BUILTIN_VPCOMLTUB,
22790 IX86_BUILTIN_VPCOMLEUB,
22791 IX86_BUILTIN_VPCOMGTUB,
22792 IX86_BUILTIN_VPCOMGEUB,
22793 IX86_BUILTIN_VPCOMFALSEUB,
22794 IX86_BUILTIN_VPCOMTRUEUB,
22796 IX86_BUILTIN_VPCOMEQUW,
22797 IX86_BUILTIN_VPCOMNEUW,
22798 IX86_BUILTIN_VPCOMLTUW,
22799 IX86_BUILTIN_VPCOMLEUW,
22800 IX86_BUILTIN_VPCOMGTUW,
22801 IX86_BUILTIN_VPCOMGEUW,
22802 IX86_BUILTIN_VPCOMFALSEUW,
22803 IX86_BUILTIN_VPCOMTRUEUW,
22805 IX86_BUILTIN_VPCOMEQUD,
22806 IX86_BUILTIN_VPCOMNEUD,
22807 IX86_BUILTIN_VPCOMLTUD,
22808 IX86_BUILTIN_VPCOMLEUD,
22809 IX86_BUILTIN_VPCOMGTUD,
22810 IX86_BUILTIN_VPCOMGEUD,
22811 IX86_BUILTIN_VPCOMFALSEUD,
22812 IX86_BUILTIN_VPCOMTRUEUD,
22814 IX86_BUILTIN_VPCOMEQUQ,
22815 IX86_BUILTIN_VPCOMNEUQ,
22816 IX86_BUILTIN_VPCOMLTUQ,
22817 IX86_BUILTIN_VPCOMLEUQ,
22818 IX86_BUILTIN_VPCOMGTUQ,
22819 IX86_BUILTIN_VPCOMGEUQ,
22820 IX86_BUILTIN_VPCOMFALSEUQ,
22821 IX86_BUILTIN_VPCOMTRUEUQ,
22823 IX86_BUILTIN_VPCOMEQB,
22824 IX86_BUILTIN_VPCOMNEB,
22825 IX86_BUILTIN_VPCOMLTB,
22826 IX86_BUILTIN_VPCOMLEB,
22827 IX86_BUILTIN_VPCOMGTB,
22828 IX86_BUILTIN_VPCOMGEB,
22829 IX86_BUILTIN_VPCOMFALSEB,
22830 IX86_BUILTIN_VPCOMTRUEB,
22832 IX86_BUILTIN_VPCOMEQW,
22833 IX86_BUILTIN_VPCOMNEW,
22834 IX86_BUILTIN_VPCOMLTW,
22835 IX86_BUILTIN_VPCOMLEW,
22836 IX86_BUILTIN_VPCOMGTW,
22837 IX86_BUILTIN_VPCOMGEW,
22838 IX86_BUILTIN_VPCOMFALSEW,
22839 IX86_BUILTIN_VPCOMTRUEW,
22841 IX86_BUILTIN_VPCOMEQD,
22842 IX86_BUILTIN_VPCOMNED,
22843 IX86_BUILTIN_VPCOMLTD,
22844 IX86_BUILTIN_VPCOMLED,
22845 IX86_BUILTIN_VPCOMGTD,
22846 IX86_BUILTIN_VPCOMGED,
22847 IX86_BUILTIN_VPCOMFALSED,
22848 IX86_BUILTIN_VPCOMTRUED,
22850 IX86_BUILTIN_VPCOMEQQ,
22851 IX86_BUILTIN_VPCOMNEQ,
22852 IX86_BUILTIN_VPCOMLTQ,
22853 IX86_BUILTIN_VPCOMLEQ,
22854 IX86_BUILTIN_VPCOMGTQ,
22855 IX86_BUILTIN_VPCOMGEQ,
22856 IX86_BUILTIN_VPCOMFALSEQ,
22857 IX86_BUILTIN_VPCOMTRUEQ,
22859 /* LWP instructions. */
22860 IX86_BUILTIN_LLWPCB,
22861 IX86_BUILTIN_SLWPCB,
22862 IX86_BUILTIN_LWPVAL32,
22863 IX86_BUILTIN_LWPVAL64,
22864 IX86_BUILTIN_LWPINS32,
22865 IX86_BUILTIN_LWPINS64,
22869 /* FSGSBASE instructions. */
22870 IX86_BUILTIN_RDFSBASE32,
22871 IX86_BUILTIN_RDFSBASE64,
22872 IX86_BUILTIN_RDGSBASE32,
22873 IX86_BUILTIN_RDGSBASE64,
22874 IX86_BUILTIN_WRFSBASE32,
22875 IX86_BUILTIN_WRFSBASE64,
22876 IX86_BUILTIN_WRGSBASE32,
22877 IX86_BUILTIN_WRGSBASE64,
22879 /* RDRND instructions. */
22880 IX86_BUILTIN_RDRAND16,
22881 IX86_BUILTIN_RDRAND32,
22882 IX86_BUILTIN_RDRAND64,
22884 /* F16C instructions. */
22885 IX86_BUILTIN_CVTPH2PS,
22886 IX86_BUILTIN_CVTPH2PS256,
22887 IX86_BUILTIN_CVTPS2PH,
22888 IX86_BUILTIN_CVTPS2PH256,
22893 /* Table for the ix86 builtin decls. */
22894 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22896 /* Table of all of the builtin functions that are possible with different ISA's
22897 but are waiting to be built until a function is declared to use that
22899 struct builtin_isa {
22900 const char *name; /* function name */
22901 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22902 int isa; /* isa_flags this builtin is defined for */
22903 bool const_p; /* true if the declaration is constant */
22904 bool set_and_not_built_p;
22907 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22910 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22911 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22912 function decl in the ix86_builtins array. Returns the function decl or
22913 NULL_TREE, if the builtin was not added.
22915 If the front end has a special hook for builtin functions, delay adding
22916 builtin functions that aren't in the current ISA until the ISA is changed
22917 with function specific optimization. Doing so, can save about 300K for the
22918 default compiler. When the builtin is expanded, check at that time whether
22921 If the front end doesn't have a special hook, record all builtins, even if
22922 it isn't an instruction set in the current ISA in case the user uses
22923 function specific options for a different ISA, so that we don't get scope
22924 errors if a builtin is added in the middle of a function scope. */
22927 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22928 enum ix86_builtins code)
22930 tree decl = NULL_TREE;
22932 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22934 ix86_builtins_isa[(int) code].isa = mask;
22936 mask &= ~OPTION_MASK_ISA_64BIT;
22938 || (mask & ix86_isa_flags) != 0
22939 || (lang_hooks.builtin_function
22940 == lang_hooks.builtin_function_ext_scope))
22943 tree type = ix86_get_builtin_func_type (tcode);
22944 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22946 ix86_builtins[(int) code] = decl;
22947 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22951 ix86_builtins[(int) code] = NULL_TREE;
22952 ix86_builtins_isa[(int) code].tcode = tcode;
22953 ix86_builtins_isa[(int) code].name = name;
22954 ix86_builtins_isa[(int) code].const_p = false;
22955 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22962 /* Like def_builtin, but also marks the function decl "const". */
22965 def_builtin_const (int mask, const char *name,
22966 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22968 tree decl = def_builtin (mask, name, tcode, code);
22970 TREE_READONLY (decl) = 1;
22972 ix86_builtins_isa[(int) code].const_p = true;
22977 /* Add any new builtin functions for a given ISA that may not have been
22978 declared. This saves a bit of space compared to adding all of the
22979 declarations to the tree, even if we didn't use them. */
22982 ix86_add_new_builtins (int isa)
22986 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22988 if ((ix86_builtins_isa[i].isa & isa) != 0
22989 && ix86_builtins_isa[i].set_and_not_built_p)
22993 /* Don't define the builtin again. */
22994 ix86_builtins_isa[i].set_and_not_built_p = false;
22996 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22997 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22998 type, i, BUILT_IN_MD, NULL,
23001 ix86_builtins[i] = decl;
23002 if (ix86_builtins_isa[i].const_p)
23003 TREE_READONLY (decl) = 1;
23008 /* Bits for builtin_description.flag. */
23010 /* Set when we don't support the comparison natively, and should
23011 swap_comparison in order to support it. */
23012 #define BUILTIN_DESC_SWAP_OPERANDS 1
23014 struct builtin_description
23016 const unsigned int mask;
23017 const enum insn_code icode;
23018 const char *const name;
23019 const enum ix86_builtins code;
23020 const enum rtx_code comparison;
23024 static const struct builtin_description bdesc_comi[] =
23026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23052 static const struct builtin_description bdesc_pcmpestr[] =
23055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23056 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23058 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23064 static const struct builtin_description bdesc_pcmpistr[] =
23067 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23068 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23069 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23070 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23071 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23072 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23073 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23076 /* Special builtins with variable number of arguments. */
23077 static const struct builtin_description bdesc_special_args[] =
23079 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23080 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23086 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23098 /* SSE or 3DNow!A */
23099 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23100 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23117 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23120 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23123 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23124 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23130 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23131 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23132 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23151 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23155 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23157 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23158 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23159 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23160 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23161 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23162 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23165 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23166 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23167 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23168 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23169 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23170 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23171 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23172 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23175 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23176 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23177 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23180 /* Builtins with variable number of arguments. */
23181 static const struct builtin_description bdesc_args[] =
23183 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23184 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23185 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23186 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23187 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23188 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23189 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23274 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23278 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23279 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23280 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23281 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23282 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23283 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23288 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23290 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23301 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23302 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23303 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23333 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23334 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23338 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23340 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23341 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23343 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23348 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23349 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23353 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23355 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23361 /* SSE MMX or 3Dnow!A */
23362 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23367 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23369 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23371 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23372 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23379 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23380 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23381 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23382 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23383 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23384 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23385 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23386 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23387 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23388 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23389 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23390 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23409 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23410 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23416 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23417 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23418 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23419 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23447 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23451 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23453 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23454 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23456 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23459 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23460 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23462 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23464 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23465 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23466 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23467 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23468 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23469 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23470 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23471 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23482 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23483 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23485 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23487 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23488 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23500 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23501 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23502 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23505 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23506 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23507 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23508 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23509 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23510 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23511 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23512 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23518 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23527 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23532 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23533 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23534 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23535 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23536 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23537 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23540 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23541 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23542 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23543 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23544 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23545 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23547 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23548 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23549 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23550 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23558 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23559 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23564 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23565 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23568 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23569 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23571 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23572 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23573 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23574 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23575 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23576 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23602 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23603 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23604 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23605 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23612 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23613 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23642 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23647 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23648 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23649 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23650 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23651 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23652 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23658 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23661 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23662 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23665 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23666 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23667 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23668 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23669 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23672 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23673 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23674 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23675 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23678 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23679 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23681 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23682 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23683 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23684 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23687 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23690 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23691 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23694 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23695 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23698 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23704 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23705 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23706 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23707 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23708 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23709 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23710 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23711 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23712 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23713 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23714 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23715 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23761 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23763 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23765 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23780 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23781 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23782 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23803 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23806 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23807 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23808 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23809 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23812 /* FMA4 and XOP. */
23813 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23814 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23815 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23816 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23817 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23818 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23819 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23820 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23821 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23822 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23823 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23824 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23825 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23826 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23827 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23828 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23829 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23830 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23831 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23832 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23833 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23834 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23835 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23836 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23837 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23838 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23839 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23840 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23841 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23842 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23843 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23844 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23845 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23846 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23847 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23848 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23849 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23850 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23851 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23852 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23853 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23854 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23855 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23856 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23857 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23858 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23859 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23860 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23861 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23862 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23863 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23864 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23866 static const struct builtin_description bdesc_multi_arg[] =
23868 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23869 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23870 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23871 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23872 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23873 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23874 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23875 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23877 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23878 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23879 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23880 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23881 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23882 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23883 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23884 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23886 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23887 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23888 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23889 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23891 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23892 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23893 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23894 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23896 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23897 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23898 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23899 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23901 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23902 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23903 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23904 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23906 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23907 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23908 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23909 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23910 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23911 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23912 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23914 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23915 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23916 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23917 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23918 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23919 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23920 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23922 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23924 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23925 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23926 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23927 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23928 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23929 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23930 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23931 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23932 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23934 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23935 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23938 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23939 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23941 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23943 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23945 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23947 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23949 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23951 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23952 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23962 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
24001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
24005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
24006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
24007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24066 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24067 in the current target ISA to allow the user to compile particular modules
24068 with different target specific options that differ from the command line
24071 ix86_init_mmx_sse_builtins (void)
24073 const struct builtin_description * d;
24074 enum ix86_builtin_func_type ftype;
24077 /* Add all special builtins with variable number of operands. */
24078 for (i = 0, d = bdesc_special_args;
24079 i < ARRAY_SIZE (bdesc_special_args);
24085 ftype = (enum ix86_builtin_func_type) d->flag;
24086 def_builtin (d->mask, d->name, ftype, d->code);
24089 /* Add all builtins with variable number of operands. */
24090 for (i = 0, d = bdesc_args;
24091 i < ARRAY_SIZE (bdesc_args);
24097 ftype = (enum ix86_builtin_func_type) d->flag;
24098 def_builtin_const (d->mask, d->name, ftype, d->code);
24101 /* pcmpestr[im] insns. */
24102 for (i = 0, d = bdesc_pcmpestr;
24103 i < ARRAY_SIZE (bdesc_pcmpestr);
24106 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24107 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24109 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24110 def_builtin_const (d->mask, d->name, ftype, d->code);
24113 /* pcmpistr[im] insns. */
24114 for (i = 0, d = bdesc_pcmpistr;
24115 i < ARRAY_SIZE (bdesc_pcmpistr);
24118 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24119 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24121 ftype = INT_FTYPE_V16QI_V16QI_INT;
24122 def_builtin_const (d->mask, d->name, ftype, d->code);
24125 /* comi/ucomi insns. */
24126 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24128 if (d->mask == OPTION_MASK_ISA_SSE2)
24129 ftype = INT_FTYPE_V2DF_V2DF;
24131 ftype = INT_FTYPE_V4SF_V4SF;
24132 def_builtin_const (d->mask, d->name, ftype, d->code);
24136 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24137 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24138 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24139 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24141 /* SSE or 3DNow!A */
24142 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24143 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24144 IX86_BUILTIN_MASKMOVQ);
24147 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24148 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24150 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24151 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24152 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24153 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24156 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24157 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24158 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24159 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24162 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24163 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24164 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24165 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24166 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24167 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24168 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24169 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24170 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24171 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24172 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24173 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24176 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24177 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24179 /* MMX access to the vec_init patterns. */
24180 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24181 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24183 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24184 V4HI_FTYPE_HI_HI_HI_HI,
24185 IX86_BUILTIN_VEC_INIT_V4HI);
24187 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24188 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24189 IX86_BUILTIN_VEC_INIT_V8QI);
24191 /* Access to the vec_extract patterns. */
24192 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24193 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24194 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24195 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24196 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24197 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24198 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24199 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24200 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24201 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24203 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24204 "__builtin_ia32_vec_ext_v4hi",
24205 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24207 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24208 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24210 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24211 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24213 /* Access to the vec_set patterns. */
24214 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24215 "__builtin_ia32_vec_set_v2di",
24216 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24218 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24219 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24221 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24222 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24224 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24225 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24227 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24228 "__builtin_ia32_vec_set_v4hi",
24229 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24231 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24232 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24234 /* Add FMA4 multi-arg argument instructions */
24235 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24240 ftype = (enum ix86_builtin_func_type) d->flag;
24241 def_builtin_const (d->mask, d->name, ftype, d->code);
24245 /* Internal method for ix86_init_builtins. */
24248 ix86_init_builtins_va_builtins_abi (void)
24250 tree ms_va_ref, sysv_va_ref;
24251 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24252 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24253 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24254 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24258 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24259 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24260 ms_va_ref = build_reference_type (ms_va_list_type_node);
24262 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24265 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24266 fnvoid_va_start_ms =
24267 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24268 fnvoid_va_end_sysv =
24269 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24270 fnvoid_va_start_sysv =
24271 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24273 fnvoid_va_copy_ms =
24274 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24276 fnvoid_va_copy_sysv =
24277 build_function_type_list (void_type_node, sysv_va_ref,
24278 sysv_va_ref, NULL_TREE);
24280 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24281 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24282 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24283 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24284 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24285 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24286 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24287 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24288 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24289 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24290 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24291 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24295 ix86_init_builtin_types (void)
24297 tree float128_type_node, float80_type_node;
24299 /* The __float80 type. */
24300 float80_type_node = long_double_type_node;
24301 if (TYPE_MODE (float80_type_node) != XFmode)
24303 /* The __float80 type. */
24304 float80_type_node = make_node (REAL_TYPE);
24306 TYPE_PRECISION (float80_type_node) = 80;
24307 layout_type (float80_type_node);
24309 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24311 /* The __float128 type. */
24312 float128_type_node = make_node (REAL_TYPE);
24313 TYPE_PRECISION (float128_type_node) = 128;
24314 layout_type (float128_type_node);
24315 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24317 /* This macro is built by i386-builtin-types.awk. */
24318 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24322 ix86_init_builtins (void)
24326 ix86_init_builtin_types ();
24328 /* TFmode support builtins. */
24329 def_builtin_const (0, "__builtin_infq",
24330 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24331 def_builtin_const (0, "__builtin_huge_valq",
24332 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24334 /* We will expand them to normal call if SSE2 isn't available since
24335 they are used by libgcc. */
24336 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24337 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24338 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24339 TREE_READONLY (t) = 1;
24340 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24342 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24343 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24344 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24345 TREE_READONLY (t) = 1;
24346 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24348 ix86_init_mmx_sse_builtins ();
24351 ix86_init_builtins_va_builtins_abi ();
24354 /* Return the ix86 builtin for CODE. */
24357 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24359 if (code >= IX86_BUILTIN_MAX)
24360 return error_mark_node;
24362 return ix86_builtins[code];
24365 /* Errors in the source file can cause expand_expr to return const0_rtx
24366 where we expect a vector. To avoid crashing, use one of the vector
24367 clear instructions. */
24369 safe_vector_operand (rtx x, enum machine_mode mode)
24371 if (x == const0_rtx)
24372 x = CONST0_RTX (mode);
24376 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24379 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24382 tree arg0 = CALL_EXPR_ARG (exp, 0);
24383 tree arg1 = CALL_EXPR_ARG (exp, 1);
24384 rtx op0 = expand_normal (arg0);
24385 rtx op1 = expand_normal (arg1);
24386 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24387 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24388 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24390 if (VECTOR_MODE_P (mode0))
24391 op0 = safe_vector_operand (op0, mode0);
24392 if (VECTOR_MODE_P (mode1))
24393 op1 = safe_vector_operand (op1, mode1);
24395 if (optimize || !target
24396 || GET_MODE (target) != tmode
24397 || !insn_data[icode].operand[0].predicate (target, tmode))
24398 target = gen_reg_rtx (tmode);
24400 if (GET_MODE (op1) == SImode && mode1 == TImode)
24402 rtx x = gen_reg_rtx (V4SImode);
24403 emit_insn (gen_sse2_loadd (x, op1));
24404 op1 = gen_lowpart (TImode, x);
24407 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24408 op0 = copy_to_mode_reg (mode0, op0);
24409 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24410 op1 = copy_to_mode_reg (mode1, op1);
24412 pat = GEN_FCN (icode) (target, op0, op1);
24421 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24424 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24425 enum ix86_builtin_func_type m_type,
24426 enum rtx_code sub_code)
24431 bool comparison_p = false;
24433 bool last_arg_constant = false;
24434 int num_memory = 0;
24437 enum machine_mode mode;
24440 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24444 case MULTI_ARG_4_DF2_DI_I:
24445 case MULTI_ARG_4_DF2_DI_I1:
24446 case MULTI_ARG_4_SF2_SI_I:
24447 case MULTI_ARG_4_SF2_SI_I1:
24449 last_arg_constant = true;
24452 case MULTI_ARG_3_SF:
24453 case MULTI_ARG_3_DF:
24454 case MULTI_ARG_3_SF2:
24455 case MULTI_ARG_3_DF2:
24456 case MULTI_ARG_3_DI:
24457 case MULTI_ARG_3_SI:
24458 case MULTI_ARG_3_SI_DI:
24459 case MULTI_ARG_3_HI:
24460 case MULTI_ARG_3_HI_SI:
24461 case MULTI_ARG_3_QI:
24462 case MULTI_ARG_3_DI2:
24463 case MULTI_ARG_3_SI2:
24464 case MULTI_ARG_3_HI2:
24465 case MULTI_ARG_3_QI2:
24469 case MULTI_ARG_2_SF:
24470 case MULTI_ARG_2_DF:
24471 case MULTI_ARG_2_DI:
24472 case MULTI_ARG_2_SI:
24473 case MULTI_ARG_2_HI:
24474 case MULTI_ARG_2_QI:
24478 case MULTI_ARG_2_DI_IMM:
24479 case MULTI_ARG_2_SI_IMM:
24480 case MULTI_ARG_2_HI_IMM:
24481 case MULTI_ARG_2_QI_IMM:
24483 last_arg_constant = true;
24486 case MULTI_ARG_1_SF:
24487 case MULTI_ARG_1_DF:
24488 case MULTI_ARG_1_SF2:
24489 case MULTI_ARG_1_DF2:
24490 case MULTI_ARG_1_DI:
24491 case MULTI_ARG_1_SI:
24492 case MULTI_ARG_1_HI:
24493 case MULTI_ARG_1_QI:
24494 case MULTI_ARG_1_SI_DI:
24495 case MULTI_ARG_1_HI_DI:
24496 case MULTI_ARG_1_HI_SI:
24497 case MULTI_ARG_1_QI_DI:
24498 case MULTI_ARG_1_QI_SI:
24499 case MULTI_ARG_1_QI_HI:
24503 case MULTI_ARG_2_DI_CMP:
24504 case MULTI_ARG_2_SI_CMP:
24505 case MULTI_ARG_2_HI_CMP:
24506 case MULTI_ARG_2_QI_CMP:
24508 comparison_p = true;
24511 case MULTI_ARG_2_SF_TF:
24512 case MULTI_ARG_2_DF_TF:
24513 case MULTI_ARG_2_DI_TF:
24514 case MULTI_ARG_2_SI_TF:
24515 case MULTI_ARG_2_HI_TF:
24516 case MULTI_ARG_2_QI_TF:
24522 gcc_unreachable ();
24525 if (optimize || !target
24526 || GET_MODE (target) != tmode
24527 || !insn_data[icode].operand[0].predicate (target, tmode))
24528 target = gen_reg_rtx (tmode);
24530 gcc_assert (nargs <= 4);
24532 for (i = 0; i < nargs; i++)
24534 tree arg = CALL_EXPR_ARG (exp, i);
24535 rtx op = expand_normal (arg);
24536 int adjust = (comparison_p) ? 1 : 0;
24537 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24539 if (last_arg_constant && i == nargs-1)
24541 if (!CONST_INT_P (op))
24543 error ("last argument must be an immediate");
24544 return gen_reg_rtx (tmode);
24549 if (VECTOR_MODE_P (mode))
24550 op = safe_vector_operand (op, mode);
24552 /* If we aren't optimizing, only allow one memory operand to be
24554 if (memory_operand (op, mode))
24557 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24560 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24562 op = force_reg (mode, op);
24566 args[i].mode = mode;
24572 pat = GEN_FCN (icode) (target, args[0].op);
24577 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24578 GEN_INT ((int)sub_code));
24579 else if (! comparison_p)
24580 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24583 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24587 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24592 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24596 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24600 gcc_unreachable ();
24610 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24611 insns with vec_merge. */
24614 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24618 tree arg0 = CALL_EXPR_ARG (exp, 0);
24619 rtx op1, op0 = expand_normal (arg0);
24620 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24621 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24623 if (optimize || !target
24624 || GET_MODE (target) != tmode
24625 || !insn_data[icode].operand[0].predicate (target, tmode))
24626 target = gen_reg_rtx (tmode);
24628 if (VECTOR_MODE_P (mode0))
24629 op0 = safe_vector_operand (op0, mode0);
24631 if ((optimize && !register_operand (op0, mode0))
24632 || !insn_data[icode].operand[1].predicate (op0, mode0))
24633 op0 = copy_to_mode_reg (mode0, op0);
24636 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24637 op1 = copy_to_mode_reg (mode0, op1);
24639 pat = GEN_FCN (icode) (target, op0, op1);
24646 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24649 ix86_expand_sse_compare (const struct builtin_description *d,
24650 tree exp, rtx target, bool swap)
24653 tree arg0 = CALL_EXPR_ARG (exp, 0);
24654 tree arg1 = CALL_EXPR_ARG (exp, 1);
24655 rtx op0 = expand_normal (arg0);
24656 rtx op1 = expand_normal (arg1);
24658 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24659 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24660 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24661 enum rtx_code comparison = d->comparison;
24663 if (VECTOR_MODE_P (mode0))
24664 op0 = safe_vector_operand (op0, mode0);
24665 if (VECTOR_MODE_P (mode1))
24666 op1 = safe_vector_operand (op1, mode1);
24668 /* Swap operands if we have a comparison that isn't available in
24672 rtx tmp = gen_reg_rtx (mode1);
24673 emit_move_insn (tmp, op1);
24678 if (optimize || !target
24679 || GET_MODE (target) != tmode
24680 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24681 target = gen_reg_rtx (tmode);
24683 if ((optimize && !register_operand (op0, mode0))
24684 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24685 op0 = copy_to_mode_reg (mode0, op0);
24686 if ((optimize && !register_operand (op1, mode1))
24687 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24688 op1 = copy_to_mode_reg (mode1, op1);
24690 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24691 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24698 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24701 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24705 tree arg0 = CALL_EXPR_ARG (exp, 0);
24706 tree arg1 = CALL_EXPR_ARG (exp, 1);
24707 rtx op0 = expand_normal (arg0);
24708 rtx op1 = expand_normal (arg1);
24709 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24710 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24711 enum rtx_code comparison = d->comparison;
24713 if (VECTOR_MODE_P (mode0))
24714 op0 = safe_vector_operand (op0, mode0);
24715 if (VECTOR_MODE_P (mode1))
24716 op1 = safe_vector_operand (op1, mode1);
24718 /* Swap operands if we have a comparison that isn't available in
24720 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24727 target = gen_reg_rtx (SImode);
24728 emit_move_insn (target, const0_rtx);
24729 target = gen_rtx_SUBREG (QImode, target, 0);
24731 if ((optimize && !register_operand (op0, mode0))
24732 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24733 op0 = copy_to_mode_reg (mode0, op0);
24734 if ((optimize && !register_operand (op1, mode1))
24735 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24736 op1 = copy_to_mode_reg (mode1, op1);
24738 pat = GEN_FCN (d->icode) (op0, op1);
24742 emit_insn (gen_rtx_SET (VOIDmode,
24743 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24744 gen_rtx_fmt_ee (comparison, QImode,
24748 return SUBREG_REG (target);
24751 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24754 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24758 tree arg0 = CALL_EXPR_ARG (exp, 0);
24759 tree arg1 = CALL_EXPR_ARG (exp, 1);
24760 rtx op0 = expand_normal (arg0);
24761 rtx op1 = expand_normal (arg1);
24762 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24763 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24764 enum rtx_code comparison = d->comparison;
24766 if (VECTOR_MODE_P (mode0))
24767 op0 = safe_vector_operand (op0, mode0);
24768 if (VECTOR_MODE_P (mode1))
24769 op1 = safe_vector_operand (op1, mode1);
24771 target = gen_reg_rtx (SImode);
24772 emit_move_insn (target, const0_rtx);
24773 target = gen_rtx_SUBREG (QImode, target, 0);
24775 if ((optimize && !register_operand (op0, mode0))
24776 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24777 op0 = copy_to_mode_reg (mode0, op0);
24778 if ((optimize && !register_operand (op1, mode1))
24779 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24780 op1 = copy_to_mode_reg (mode1, op1);
24782 pat = GEN_FCN (d->icode) (op0, op1);
24786 emit_insn (gen_rtx_SET (VOIDmode,
24787 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24788 gen_rtx_fmt_ee (comparison, QImode,
24792 return SUBREG_REG (target);
24795 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24798 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24799 tree exp, rtx target)
24802 tree arg0 = CALL_EXPR_ARG (exp, 0);
24803 tree arg1 = CALL_EXPR_ARG (exp, 1);
24804 tree arg2 = CALL_EXPR_ARG (exp, 2);
24805 tree arg3 = CALL_EXPR_ARG (exp, 3);
24806 tree arg4 = CALL_EXPR_ARG (exp, 4);
24807 rtx scratch0, scratch1;
24808 rtx op0 = expand_normal (arg0);
24809 rtx op1 = expand_normal (arg1);
24810 rtx op2 = expand_normal (arg2);
24811 rtx op3 = expand_normal (arg3);
24812 rtx op4 = expand_normal (arg4);
24813 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24815 tmode0 = insn_data[d->icode].operand[0].mode;
24816 tmode1 = insn_data[d->icode].operand[1].mode;
24817 modev2 = insn_data[d->icode].operand[2].mode;
24818 modei3 = insn_data[d->icode].operand[3].mode;
24819 modev4 = insn_data[d->icode].operand[4].mode;
24820 modei5 = insn_data[d->icode].operand[5].mode;
24821 modeimm = insn_data[d->icode].operand[6].mode;
24823 if (VECTOR_MODE_P (modev2))
24824 op0 = safe_vector_operand (op0, modev2);
24825 if (VECTOR_MODE_P (modev4))
24826 op2 = safe_vector_operand (op2, modev4);
24828 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24829 op0 = copy_to_mode_reg (modev2, op0);
24830 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24831 op1 = copy_to_mode_reg (modei3, op1);
24832 if ((optimize && !register_operand (op2, modev4))
24833 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24834 op2 = copy_to_mode_reg (modev4, op2);
24835 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24836 op3 = copy_to_mode_reg (modei5, op3);
24838 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24840 error ("the fifth argument must be a 8-bit immediate");
24844 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24846 if (optimize || !target
24847 || GET_MODE (target) != tmode0
24848 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24849 target = gen_reg_rtx (tmode0);
24851 scratch1 = gen_reg_rtx (tmode1);
24853 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24855 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24857 if (optimize || !target
24858 || GET_MODE (target) != tmode1
24859 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24860 target = gen_reg_rtx (tmode1);
24862 scratch0 = gen_reg_rtx (tmode0);
24864 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24868 gcc_assert (d->flag);
24870 scratch0 = gen_reg_rtx (tmode0);
24871 scratch1 = gen_reg_rtx (tmode1);
24873 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24883 target = gen_reg_rtx (SImode);
24884 emit_move_insn (target, const0_rtx);
24885 target = gen_rtx_SUBREG (QImode, target, 0);
24888 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24889 gen_rtx_fmt_ee (EQ, QImode,
24890 gen_rtx_REG ((enum machine_mode) d->flag,
24893 return SUBREG_REG (target);
24900 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24903 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24904 tree exp, rtx target)
24907 tree arg0 = CALL_EXPR_ARG (exp, 0);
24908 tree arg1 = CALL_EXPR_ARG (exp, 1);
24909 tree arg2 = CALL_EXPR_ARG (exp, 2);
24910 rtx scratch0, scratch1;
24911 rtx op0 = expand_normal (arg0);
24912 rtx op1 = expand_normal (arg1);
24913 rtx op2 = expand_normal (arg2);
24914 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24916 tmode0 = insn_data[d->icode].operand[0].mode;
24917 tmode1 = insn_data[d->icode].operand[1].mode;
24918 modev2 = insn_data[d->icode].operand[2].mode;
24919 modev3 = insn_data[d->icode].operand[3].mode;
24920 modeimm = insn_data[d->icode].operand[4].mode;
24922 if (VECTOR_MODE_P (modev2))
24923 op0 = safe_vector_operand (op0, modev2);
24924 if (VECTOR_MODE_P (modev3))
24925 op1 = safe_vector_operand (op1, modev3);
24927 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24928 op0 = copy_to_mode_reg (modev2, op0);
24929 if ((optimize && !register_operand (op1, modev3))
24930 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24931 op1 = copy_to_mode_reg (modev3, op1);
24933 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24935 error ("the third argument must be a 8-bit immediate");
24939 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24941 if (optimize || !target
24942 || GET_MODE (target) != tmode0
24943 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24944 target = gen_reg_rtx (tmode0);
24946 scratch1 = gen_reg_rtx (tmode1);
24948 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24950 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24952 if (optimize || !target
24953 || GET_MODE (target) != tmode1
24954 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24955 target = gen_reg_rtx (tmode1);
24957 scratch0 = gen_reg_rtx (tmode0);
24959 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24963 gcc_assert (d->flag);
24965 scratch0 = gen_reg_rtx (tmode0);
24966 scratch1 = gen_reg_rtx (tmode1);
24968 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24978 target = gen_reg_rtx (SImode);
24979 emit_move_insn (target, const0_rtx);
24980 target = gen_rtx_SUBREG (QImode, target, 0);
24983 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24984 gen_rtx_fmt_ee (EQ, QImode,
24985 gen_rtx_REG ((enum machine_mode) d->flag,
24988 return SUBREG_REG (target);
24994 /* Subroutine of ix86_expand_builtin to take care of insns with
24995 variable number of operands. */
24998 ix86_expand_args_builtin (const struct builtin_description *d,
24999 tree exp, rtx target)
25001 rtx pat, real_target;
25002 unsigned int i, nargs;
25003 unsigned int nargs_constant = 0;
25004 int num_memory = 0;
25008 enum machine_mode mode;
25010 bool last_arg_count = false;
25011 enum insn_code icode = d->icode;
25012 const struct insn_data_d *insn_p = &insn_data[icode];
25013 enum machine_mode tmode = insn_p->operand[0].mode;
25014 enum machine_mode rmode = VOIDmode;
25016 enum rtx_code comparison = d->comparison;
25018 switch ((enum ix86_builtin_func_type) d->flag)
25020 case INT_FTYPE_V8SF_V8SF_PTEST:
25021 case INT_FTYPE_V4DI_V4DI_PTEST:
25022 case INT_FTYPE_V4DF_V4DF_PTEST:
25023 case INT_FTYPE_V4SF_V4SF_PTEST:
25024 case INT_FTYPE_V2DI_V2DI_PTEST:
25025 case INT_FTYPE_V2DF_V2DF_PTEST:
25026 return ix86_expand_sse_ptest (d, exp, target);
25027 case FLOAT128_FTYPE_FLOAT128:
25028 case FLOAT_FTYPE_FLOAT:
25029 case INT_FTYPE_INT:
25030 case UINT64_FTYPE_INT:
25031 case UINT16_FTYPE_UINT16:
25032 case INT64_FTYPE_INT64:
25033 case INT64_FTYPE_V4SF:
25034 case INT64_FTYPE_V2DF:
25035 case INT_FTYPE_V16QI:
25036 case INT_FTYPE_V8QI:
25037 case INT_FTYPE_V8SF:
25038 case INT_FTYPE_V4DF:
25039 case INT_FTYPE_V4SF:
25040 case INT_FTYPE_V2DF:
25041 case V16QI_FTYPE_V16QI:
25042 case V8SI_FTYPE_V8SF:
25043 case V8SI_FTYPE_V4SI:
25044 case V8HI_FTYPE_V8HI:
25045 case V8HI_FTYPE_V16QI:
25046 case V8QI_FTYPE_V8QI:
25047 case V8SF_FTYPE_V8SF:
25048 case V8SF_FTYPE_V8SI:
25049 case V8SF_FTYPE_V4SF:
25050 case V8SF_FTYPE_V8HI:
25051 case V4SI_FTYPE_V4SI:
25052 case V4SI_FTYPE_V16QI:
25053 case V4SI_FTYPE_V4SF:
25054 case V4SI_FTYPE_V8SI:
25055 case V4SI_FTYPE_V8HI:
25056 case V4SI_FTYPE_V4DF:
25057 case V4SI_FTYPE_V2DF:
25058 case V4HI_FTYPE_V4HI:
25059 case V4DF_FTYPE_V4DF:
25060 case V4DF_FTYPE_V4SI:
25061 case V4DF_FTYPE_V4SF:
25062 case V4DF_FTYPE_V2DF:
25063 case V4SF_FTYPE_V4SF:
25064 case V4SF_FTYPE_V4SI:
25065 case V4SF_FTYPE_V8SF:
25066 case V4SF_FTYPE_V4DF:
25067 case V4SF_FTYPE_V8HI:
25068 case V4SF_FTYPE_V2DF:
25069 case V2DI_FTYPE_V2DI:
25070 case V2DI_FTYPE_V16QI:
25071 case V2DI_FTYPE_V8HI:
25072 case V2DI_FTYPE_V4SI:
25073 case V2DF_FTYPE_V2DF:
25074 case V2DF_FTYPE_V4SI:
25075 case V2DF_FTYPE_V4DF:
25076 case V2DF_FTYPE_V4SF:
25077 case V2DF_FTYPE_V2SI:
25078 case V2SI_FTYPE_V2SI:
25079 case V2SI_FTYPE_V4SF:
25080 case V2SI_FTYPE_V2SF:
25081 case V2SI_FTYPE_V2DF:
25082 case V2SF_FTYPE_V2SF:
25083 case V2SF_FTYPE_V2SI:
25086 case V4SF_FTYPE_V4SF_VEC_MERGE:
25087 case V2DF_FTYPE_V2DF_VEC_MERGE:
25088 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25089 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25090 case V16QI_FTYPE_V16QI_V16QI:
25091 case V16QI_FTYPE_V8HI_V8HI:
25092 case V8QI_FTYPE_V8QI_V8QI:
25093 case V8QI_FTYPE_V4HI_V4HI:
25094 case V8HI_FTYPE_V8HI_V8HI:
25095 case V8HI_FTYPE_V16QI_V16QI:
25096 case V8HI_FTYPE_V4SI_V4SI:
25097 case V8SF_FTYPE_V8SF_V8SF:
25098 case V8SF_FTYPE_V8SF_V8SI:
25099 case V4SI_FTYPE_V4SI_V4SI:
25100 case V4SI_FTYPE_V8HI_V8HI:
25101 case V4SI_FTYPE_V4SF_V4SF:
25102 case V4SI_FTYPE_V2DF_V2DF:
25103 case V4HI_FTYPE_V4HI_V4HI:
25104 case V4HI_FTYPE_V8QI_V8QI:
25105 case V4HI_FTYPE_V2SI_V2SI:
25106 case V4DF_FTYPE_V4DF_V4DF:
25107 case V4DF_FTYPE_V4DF_V4DI:
25108 case V4SF_FTYPE_V4SF_V4SF:
25109 case V4SF_FTYPE_V4SF_V4SI:
25110 case V4SF_FTYPE_V4SF_V2SI:
25111 case V4SF_FTYPE_V4SF_V2DF:
25112 case V4SF_FTYPE_V4SF_DI:
25113 case V4SF_FTYPE_V4SF_SI:
25114 case V2DI_FTYPE_V2DI_V2DI:
25115 case V2DI_FTYPE_V16QI_V16QI:
25116 case V2DI_FTYPE_V4SI_V4SI:
25117 case V2DI_FTYPE_V2DI_V16QI:
25118 case V2DI_FTYPE_V2DF_V2DF:
25119 case V2SI_FTYPE_V2SI_V2SI:
25120 case V2SI_FTYPE_V4HI_V4HI:
25121 case V2SI_FTYPE_V2SF_V2SF:
25122 case V2DF_FTYPE_V2DF_V2DF:
25123 case V2DF_FTYPE_V2DF_V4SF:
25124 case V2DF_FTYPE_V2DF_V2DI:
25125 case V2DF_FTYPE_V2DF_DI:
25126 case V2DF_FTYPE_V2DF_SI:
25127 case V2SF_FTYPE_V2SF_V2SF:
25128 case V1DI_FTYPE_V1DI_V1DI:
25129 case V1DI_FTYPE_V8QI_V8QI:
25130 case V1DI_FTYPE_V2SI_V2SI:
25131 if (comparison == UNKNOWN)
25132 return ix86_expand_binop_builtin (icode, exp, target);
25135 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25136 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25137 gcc_assert (comparison != UNKNOWN);
25141 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25142 case V8HI_FTYPE_V8HI_SI_COUNT:
25143 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25144 case V4SI_FTYPE_V4SI_SI_COUNT:
25145 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25146 case V4HI_FTYPE_V4HI_SI_COUNT:
25147 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25148 case V2DI_FTYPE_V2DI_SI_COUNT:
25149 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25150 case V2SI_FTYPE_V2SI_SI_COUNT:
25151 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25152 case V1DI_FTYPE_V1DI_SI_COUNT:
25154 last_arg_count = true;
25156 case UINT64_FTYPE_UINT64_UINT64:
25157 case UINT_FTYPE_UINT_UINT:
25158 case UINT_FTYPE_UINT_USHORT:
25159 case UINT_FTYPE_UINT_UCHAR:
25160 case UINT16_FTYPE_UINT16_INT:
25161 case UINT8_FTYPE_UINT8_INT:
25164 case V2DI_FTYPE_V2DI_INT_CONVERT:
25167 nargs_constant = 1;
25169 case V8HI_FTYPE_V8HI_INT:
25170 case V8HI_FTYPE_V8SF_INT:
25171 case V8HI_FTYPE_V4SF_INT:
25172 case V8SF_FTYPE_V8SF_INT:
25173 case V4SI_FTYPE_V4SI_INT:
25174 case V4SI_FTYPE_V8SI_INT:
25175 case V4HI_FTYPE_V4HI_INT:
25176 case V4DF_FTYPE_V4DF_INT:
25177 case V4SF_FTYPE_V4SF_INT:
25178 case V4SF_FTYPE_V8SF_INT:
25179 case V2DI_FTYPE_V2DI_INT:
25180 case V2DF_FTYPE_V2DF_INT:
25181 case V2DF_FTYPE_V4DF_INT:
25183 nargs_constant = 1;
25185 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25186 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25187 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25188 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25189 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25192 case V16QI_FTYPE_V16QI_V16QI_INT:
25193 case V8HI_FTYPE_V8HI_V8HI_INT:
25194 case V8SI_FTYPE_V8SI_V8SI_INT:
25195 case V8SI_FTYPE_V8SI_V4SI_INT:
25196 case V8SF_FTYPE_V8SF_V8SF_INT:
25197 case V8SF_FTYPE_V8SF_V4SF_INT:
25198 case V4SI_FTYPE_V4SI_V4SI_INT:
25199 case V4DF_FTYPE_V4DF_V4DF_INT:
25200 case V4DF_FTYPE_V4DF_V2DF_INT:
25201 case V4SF_FTYPE_V4SF_V4SF_INT:
25202 case V2DI_FTYPE_V2DI_V2DI_INT:
25203 case V2DF_FTYPE_V2DF_V2DF_INT:
25205 nargs_constant = 1;
25207 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25210 nargs_constant = 1;
25212 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25215 nargs_constant = 1;
25217 case V2DI_FTYPE_V2DI_UINT_UINT:
25219 nargs_constant = 2;
25221 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25222 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25223 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25224 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25226 nargs_constant = 1;
25228 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25230 nargs_constant = 2;
25233 gcc_unreachable ();
25236 gcc_assert (nargs <= ARRAY_SIZE (args));
25238 if (comparison != UNKNOWN)
25240 gcc_assert (nargs == 2);
25241 return ix86_expand_sse_compare (d, exp, target, swap);
25244 if (rmode == VOIDmode || rmode == tmode)
25248 || GET_MODE (target) != tmode
25249 || !insn_p->operand[0].predicate (target, tmode))
25250 target = gen_reg_rtx (tmode);
25251 real_target = target;
25255 target = gen_reg_rtx (rmode);
25256 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25259 for (i = 0; i < nargs; i++)
25261 tree arg = CALL_EXPR_ARG (exp, i);
25262 rtx op = expand_normal (arg);
25263 enum machine_mode mode = insn_p->operand[i + 1].mode;
25264 bool match = insn_p->operand[i + 1].predicate (op, mode);
25266 if (last_arg_count && (i + 1) == nargs)
25268 /* SIMD shift insns take either an 8-bit immediate or
25269 register as count. But builtin functions take int as
25270 count. If count doesn't match, we put it in register. */
25273 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25274 if (!insn_p->operand[i + 1].predicate (op, mode))
25275 op = copy_to_reg (op);
25278 else if ((nargs - i) <= nargs_constant)
25283 case CODE_FOR_sse4_1_roundpd:
25284 case CODE_FOR_sse4_1_roundps:
25285 case CODE_FOR_sse4_1_roundsd:
25286 case CODE_FOR_sse4_1_roundss:
25287 case CODE_FOR_sse4_1_blendps:
25288 case CODE_FOR_avx_blendpd256:
25289 case CODE_FOR_avx_vpermilv4df:
25290 case CODE_FOR_avx_roundpd256:
25291 case CODE_FOR_avx_roundps256:
25292 error ("the last argument must be a 4-bit immediate");
25295 case CODE_FOR_sse4_1_blendpd:
25296 case CODE_FOR_avx_vpermilv2df:
25297 case CODE_FOR_xop_vpermil2v2df3:
25298 case CODE_FOR_xop_vpermil2v4sf3:
25299 case CODE_FOR_xop_vpermil2v4df3:
25300 case CODE_FOR_xop_vpermil2v8sf3:
25301 error ("the last argument must be a 2-bit immediate");
25304 case CODE_FOR_avx_vextractf128v4df:
25305 case CODE_FOR_avx_vextractf128v8sf:
25306 case CODE_FOR_avx_vextractf128v8si:
25307 case CODE_FOR_avx_vinsertf128v4df:
25308 case CODE_FOR_avx_vinsertf128v8sf:
25309 case CODE_FOR_avx_vinsertf128v8si:
25310 error ("the last argument must be a 1-bit immediate");
25313 case CODE_FOR_avx_cmpsdv2df3:
25314 case CODE_FOR_avx_cmpssv4sf3:
25315 case CODE_FOR_avx_cmppdv2df3:
25316 case CODE_FOR_avx_cmppsv4sf3:
25317 case CODE_FOR_avx_cmppdv4df3:
25318 case CODE_FOR_avx_cmppsv8sf3:
25319 error ("the last argument must be a 5-bit immediate");
25323 switch (nargs_constant)
25326 if ((nargs - i) == nargs_constant)
25328 error ("the next to last argument must be an 8-bit immediate");
25332 error ("the last argument must be an 8-bit immediate");
25335 gcc_unreachable ();
25342 if (VECTOR_MODE_P (mode))
25343 op = safe_vector_operand (op, mode);
25345 /* If we aren't optimizing, only allow one memory operand to
25347 if (memory_operand (op, mode))
25350 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25352 if (optimize || !match || num_memory > 1)
25353 op = copy_to_mode_reg (mode, op);
25357 op = copy_to_reg (op);
25358 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25363 args[i].mode = mode;
25369 pat = GEN_FCN (icode) (real_target, args[0].op);
25372 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25375 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25379 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25380 args[2].op, args[3].op);
25383 gcc_unreachable ();
25393 /* Subroutine of ix86_expand_builtin to take care of special insns
25394 with variable number of operands. */
25397 ix86_expand_special_args_builtin (const struct builtin_description *d,
25398 tree exp, rtx target)
25402 unsigned int i, nargs, arg_adjust, memory;
25406 enum machine_mode mode;
25408 enum insn_code icode = d->icode;
25409 bool last_arg_constant = false;
25410 const struct insn_data_d *insn_p = &insn_data[icode];
25411 enum machine_mode tmode = insn_p->operand[0].mode;
25412 enum { load, store } klass;
25414 switch ((enum ix86_builtin_func_type) d->flag)
25416 case VOID_FTYPE_VOID:
25417 emit_insn (GEN_FCN (icode) (target));
25419 case VOID_FTYPE_UINT64:
25420 case VOID_FTYPE_UNSIGNED:
25426 case UINT64_FTYPE_VOID:
25427 case UNSIGNED_FTYPE_VOID:
25428 case UINT16_FTYPE_VOID:
25433 case UINT64_FTYPE_PUNSIGNED:
25434 case V2DI_FTYPE_PV2DI:
25435 case V32QI_FTYPE_PCCHAR:
25436 case V16QI_FTYPE_PCCHAR:
25437 case V8SF_FTYPE_PCV4SF:
25438 case V8SF_FTYPE_PCFLOAT:
25439 case V4SF_FTYPE_PCFLOAT:
25440 case V4DF_FTYPE_PCV2DF:
25441 case V4DF_FTYPE_PCDOUBLE:
25442 case V2DF_FTYPE_PCDOUBLE:
25443 case VOID_FTYPE_PVOID:
25448 case VOID_FTYPE_PV2SF_V4SF:
25449 case VOID_FTYPE_PV4DI_V4DI:
25450 case VOID_FTYPE_PV2DI_V2DI:
25451 case VOID_FTYPE_PCHAR_V32QI:
25452 case VOID_FTYPE_PCHAR_V16QI:
25453 case VOID_FTYPE_PFLOAT_V8SF:
25454 case VOID_FTYPE_PFLOAT_V4SF:
25455 case VOID_FTYPE_PDOUBLE_V4DF:
25456 case VOID_FTYPE_PDOUBLE_V2DF:
25457 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25458 case VOID_FTYPE_PINT_INT:
25461 /* Reserve memory operand for target. */
25462 memory = ARRAY_SIZE (args);
25464 case V4SF_FTYPE_V4SF_PCV2SF:
25465 case V2DF_FTYPE_V2DF_PCDOUBLE:
25470 case V8SF_FTYPE_PCV8SF_V8SF:
25471 case V4DF_FTYPE_PCV4DF_V4DF:
25472 case V4SF_FTYPE_PCV4SF_V4SF:
25473 case V2DF_FTYPE_PCV2DF_V2DF:
25478 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25479 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25480 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25481 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25484 /* Reserve memory operand for target. */
25485 memory = ARRAY_SIZE (args);
25487 case VOID_FTYPE_UINT_UINT_UINT:
25488 case VOID_FTYPE_UINT64_UINT_UINT:
25489 case UCHAR_FTYPE_UINT_UINT_UINT:
25490 case UCHAR_FTYPE_UINT64_UINT_UINT:
25493 memory = ARRAY_SIZE (args);
25494 last_arg_constant = true;
25497 gcc_unreachable ();
25500 gcc_assert (nargs <= ARRAY_SIZE (args));
25502 if (klass == store)
25504 arg = CALL_EXPR_ARG (exp, 0);
25505 op = expand_normal (arg);
25506 gcc_assert (target == 0);
25508 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25510 target = force_reg (tmode, op);
25518 || GET_MODE (target) != tmode
25519 || !insn_p->operand[0].predicate (target, tmode))
25520 target = gen_reg_rtx (tmode);
25523 for (i = 0; i < nargs; i++)
25525 enum machine_mode mode = insn_p->operand[i + 1].mode;
25528 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25529 op = expand_normal (arg);
25530 match = insn_p->operand[i + 1].predicate (op, mode);
25532 if (last_arg_constant && (i + 1) == nargs)
25536 if (icode == CODE_FOR_lwp_lwpvalsi3
25537 || icode == CODE_FOR_lwp_lwpinssi3
25538 || icode == CODE_FOR_lwp_lwpvaldi3
25539 || icode == CODE_FOR_lwp_lwpinsdi3)
25540 error ("the last argument must be a 32-bit immediate");
25542 error ("the last argument must be an 8-bit immediate");
25550 /* This must be the memory operand. */
25551 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25552 gcc_assert (GET_MODE (op) == mode
25553 || GET_MODE (op) == VOIDmode);
25557 /* This must be register. */
25558 if (VECTOR_MODE_P (mode))
25559 op = safe_vector_operand (op, mode);
25561 gcc_assert (GET_MODE (op) == mode
25562 || GET_MODE (op) == VOIDmode);
25563 op = copy_to_mode_reg (mode, op);
25568 args[i].mode = mode;
25574 pat = GEN_FCN (icode) (target);
25577 pat = GEN_FCN (icode) (target, args[0].op);
25580 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25583 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25586 gcc_unreachable ();
25592 return klass == store ? 0 : target;
25595 /* Return the integer constant in ARG. Constrain it to be in the range
25596 of the subparts of VEC_TYPE; issue an error if not. */
25599 get_element_number (tree vec_type, tree arg)
25601 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25603 if (!host_integerp (arg, 1)
25604 || (elt = tree_low_cst (arg, 1), elt > max))
25606 error ("selector must be an integer constant in the range 0..%wi", max);
25613 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25614 ix86_expand_vector_init. We DO have language-level syntax for this, in
25615 the form of (type){ init-list }. Except that since we can't place emms
25616 instructions from inside the compiler, we can't allow the use of MMX
25617 registers unless the user explicitly asks for it. So we do *not* define
25618 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25619 we have builtins invoked by mmintrin.h that gives us license to emit
25620 these sorts of instructions. */
25623 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25625 enum machine_mode tmode = TYPE_MODE (type);
25626 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25627 int i, n_elt = GET_MODE_NUNITS (tmode);
25628 rtvec v = rtvec_alloc (n_elt);
25630 gcc_assert (VECTOR_MODE_P (tmode));
25631 gcc_assert (call_expr_nargs (exp) == n_elt);
25633 for (i = 0; i < n_elt; ++i)
25635 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25636 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25639 if (!target || !register_operand (target, tmode))
25640 target = gen_reg_rtx (tmode);
25642 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25646 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25647 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25648 had a language-level syntax for referencing vector elements. */
25651 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25653 enum machine_mode tmode, mode0;
25658 arg0 = CALL_EXPR_ARG (exp, 0);
25659 arg1 = CALL_EXPR_ARG (exp, 1);
25661 op0 = expand_normal (arg0);
25662 elt = get_element_number (TREE_TYPE (arg0), arg1);
25664 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25665 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25666 gcc_assert (VECTOR_MODE_P (mode0));
25668 op0 = force_reg (mode0, op0);
25670 if (optimize || !target || !register_operand (target, tmode))
25671 target = gen_reg_rtx (tmode);
25673 ix86_expand_vector_extract (true, target, op0, elt);
25678 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25679 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25680 a language-level syntax for referencing vector elements. */
25683 ix86_expand_vec_set_builtin (tree exp)
25685 enum machine_mode tmode, mode1;
25686 tree arg0, arg1, arg2;
25688 rtx op0, op1, target;
25690 arg0 = CALL_EXPR_ARG (exp, 0);
25691 arg1 = CALL_EXPR_ARG (exp, 1);
25692 arg2 = CALL_EXPR_ARG (exp, 2);
25694 tmode = TYPE_MODE (TREE_TYPE (arg0));
25695 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25696 gcc_assert (VECTOR_MODE_P (tmode));
25698 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25699 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25700 elt = get_element_number (TREE_TYPE (arg0), arg2);
25702 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25703 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25705 op0 = force_reg (tmode, op0);
25706 op1 = force_reg (mode1, op1);
25708 /* OP0 is the source of these builtin functions and shouldn't be
25709 modified. Create a copy, use it and return it as target. */
25710 target = gen_reg_rtx (tmode);
25711 emit_move_insn (target, op0);
25712 ix86_expand_vector_set (true, target, op1, elt);
25717 /* Expand an expression EXP that calls a built-in function,
25718 with result going to TARGET if that's convenient
25719 (and in mode MODE if that's convenient).
25720 SUBTARGET may be used as the target for computing one of EXP's operands.
25721 IGNORE is nonzero if the value is to be ignored. */
25724 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25725 enum machine_mode mode ATTRIBUTE_UNUSED,
25726 int ignore ATTRIBUTE_UNUSED)
25728 const struct builtin_description *d;
25730 enum insn_code icode;
25731 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25732 tree arg0, arg1, arg2;
25733 rtx op0, op1, op2, pat;
25734 enum machine_mode mode0, mode1, mode2;
25735 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25737 /* Determine whether the builtin function is available under the current ISA.
25738 Originally the builtin was not created if it wasn't applicable to the
25739 current ISA based on the command line switches. With function specific
25740 options, we need to check in the context of the function making the call
25741 whether it is supported. */
25742 if (ix86_builtins_isa[fcode].isa
25743 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25745 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25746 NULL, NULL, false);
25749 error ("%qE needs unknown isa option", fndecl);
25752 gcc_assert (opts != NULL);
25753 error ("%qE needs isa option %s", fndecl, opts);
25761 case IX86_BUILTIN_MASKMOVQ:
25762 case IX86_BUILTIN_MASKMOVDQU:
25763 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25764 ? CODE_FOR_mmx_maskmovq
25765 : CODE_FOR_sse2_maskmovdqu);
25766 /* Note the arg order is different from the operand order. */
25767 arg1 = CALL_EXPR_ARG (exp, 0);
25768 arg2 = CALL_EXPR_ARG (exp, 1);
25769 arg0 = CALL_EXPR_ARG (exp, 2);
25770 op0 = expand_normal (arg0);
25771 op1 = expand_normal (arg1);
25772 op2 = expand_normal (arg2);
25773 mode0 = insn_data[icode].operand[0].mode;
25774 mode1 = insn_data[icode].operand[1].mode;
25775 mode2 = insn_data[icode].operand[2].mode;
25777 op0 = force_reg (Pmode, op0);
25778 op0 = gen_rtx_MEM (mode1, op0);
25780 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25781 op0 = copy_to_mode_reg (mode0, op0);
25782 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25783 op1 = copy_to_mode_reg (mode1, op1);
25784 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25785 op2 = copy_to_mode_reg (mode2, op2);
25786 pat = GEN_FCN (icode) (op0, op1, op2);
25792 case IX86_BUILTIN_LDMXCSR:
25793 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25794 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25795 emit_move_insn (target, op0);
25796 emit_insn (gen_sse_ldmxcsr (target));
25799 case IX86_BUILTIN_STMXCSR:
25800 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25801 emit_insn (gen_sse_stmxcsr (target));
25802 return copy_to_mode_reg (SImode, target);
25804 case IX86_BUILTIN_CLFLUSH:
25805 arg0 = CALL_EXPR_ARG (exp, 0);
25806 op0 = expand_normal (arg0);
25807 icode = CODE_FOR_sse2_clflush;
25808 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25809 op0 = copy_to_mode_reg (Pmode, op0);
25811 emit_insn (gen_sse2_clflush (op0));
25814 case IX86_BUILTIN_MONITOR:
25815 arg0 = CALL_EXPR_ARG (exp, 0);
25816 arg1 = CALL_EXPR_ARG (exp, 1);
25817 arg2 = CALL_EXPR_ARG (exp, 2);
25818 op0 = expand_normal (arg0);
25819 op1 = expand_normal (arg1);
25820 op2 = expand_normal (arg2);
25822 op0 = copy_to_mode_reg (Pmode, op0);
25824 op1 = copy_to_mode_reg (SImode, op1);
25826 op2 = copy_to_mode_reg (SImode, op2);
25827 emit_insn (ix86_gen_monitor (op0, op1, op2));
25830 case IX86_BUILTIN_MWAIT:
25831 arg0 = CALL_EXPR_ARG (exp, 0);
25832 arg1 = CALL_EXPR_ARG (exp, 1);
25833 op0 = expand_normal (arg0);
25834 op1 = expand_normal (arg1);
25836 op0 = copy_to_mode_reg (SImode, op0);
25838 op1 = copy_to_mode_reg (SImode, op1);
25839 emit_insn (gen_sse3_mwait (op0, op1));
25842 case IX86_BUILTIN_VEC_INIT_V2SI:
25843 case IX86_BUILTIN_VEC_INIT_V4HI:
25844 case IX86_BUILTIN_VEC_INIT_V8QI:
25845 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25847 case IX86_BUILTIN_VEC_EXT_V2DF:
25848 case IX86_BUILTIN_VEC_EXT_V2DI:
25849 case IX86_BUILTIN_VEC_EXT_V4SF:
25850 case IX86_BUILTIN_VEC_EXT_V4SI:
25851 case IX86_BUILTIN_VEC_EXT_V8HI:
25852 case IX86_BUILTIN_VEC_EXT_V2SI:
25853 case IX86_BUILTIN_VEC_EXT_V4HI:
25854 case IX86_BUILTIN_VEC_EXT_V16QI:
25855 return ix86_expand_vec_ext_builtin (exp, target);
25857 case IX86_BUILTIN_VEC_SET_V2DI:
25858 case IX86_BUILTIN_VEC_SET_V4SF:
25859 case IX86_BUILTIN_VEC_SET_V4SI:
25860 case IX86_BUILTIN_VEC_SET_V8HI:
25861 case IX86_BUILTIN_VEC_SET_V4HI:
25862 case IX86_BUILTIN_VEC_SET_V16QI:
25863 return ix86_expand_vec_set_builtin (exp);
25865 case IX86_BUILTIN_VEC_PERM_V2DF:
25866 case IX86_BUILTIN_VEC_PERM_V4SF:
25867 case IX86_BUILTIN_VEC_PERM_V2DI:
25868 case IX86_BUILTIN_VEC_PERM_V4SI:
25869 case IX86_BUILTIN_VEC_PERM_V8HI:
25870 case IX86_BUILTIN_VEC_PERM_V16QI:
25871 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25872 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25873 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25874 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25875 case IX86_BUILTIN_VEC_PERM_V4DF:
25876 case IX86_BUILTIN_VEC_PERM_V8SF:
25877 return ix86_expand_vec_perm_builtin (exp);
25879 case IX86_BUILTIN_INFQ:
25880 case IX86_BUILTIN_HUGE_VALQ:
25882 REAL_VALUE_TYPE inf;
25886 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25888 tmp = validize_mem (force_const_mem (mode, tmp));
25891 target = gen_reg_rtx (mode);
25893 emit_move_insn (target, tmp);
25897 case IX86_BUILTIN_LLWPCB:
25898 arg0 = CALL_EXPR_ARG (exp, 0);
25899 op0 = expand_normal (arg0);
25900 icode = CODE_FOR_lwp_llwpcb;
25901 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25902 op0 = copy_to_mode_reg (Pmode, op0);
25903 emit_insn (gen_lwp_llwpcb (op0));
25906 case IX86_BUILTIN_SLWPCB:
25907 icode = CODE_FOR_lwp_slwpcb;
25909 || !insn_data[icode].operand[0].predicate (target, Pmode))
25910 target = gen_reg_rtx (Pmode);
25911 emit_insn (gen_lwp_slwpcb (target));
25918 for (i = 0, d = bdesc_special_args;
25919 i < ARRAY_SIZE (bdesc_special_args);
25921 if (d->code == fcode)
25922 return ix86_expand_special_args_builtin (d, exp, target);
25924 for (i = 0, d = bdesc_args;
25925 i < ARRAY_SIZE (bdesc_args);
25927 if (d->code == fcode)
25930 case IX86_BUILTIN_FABSQ:
25931 case IX86_BUILTIN_COPYSIGNQ:
25933 /* Emit a normal call if SSE2 isn't available. */
25934 return expand_call (exp, target, ignore);
25936 return ix86_expand_args_builtin (d, exp, target);
25939 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25940 if (d->code == fcode)
25941 return ix86_expand_sse_comi (d, exp, target);
25943 for (i = 0, d = bdesc_pcmpestr;
25944 i < ARRAY_SIZE (bdesc_pcmpestr);
25946 if (d->code == fcode)
25947 return ix86_expand_sse_pcmpestr (d, exp, target);
25949 for (i = 0, d = bdesc_pcmpistr;
25950 i < ARRAY_SIZE (bdesc_pcmpistr);
25952 if (d->code == fcode)
25953 return ix86_expand_sse_pcmpistr (d, exp, target);
25955 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25956 if (d->code == fcode)
25957 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25958 (enum ix86_builtin_func_type)
25959 d->flag, d->comparison);
25961 gcc_unreachable ();
25964 /* Returns a function decl for a vectorized version of the builtin function
25965 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25966 if it is not available. */
25969 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25972 enum machine_mode in_mode, out_mode;
25974 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25976 if (TREE_CODE (type_out) != VECTOR_TYPE
25977 || TREE_CODE (type_in) != VECTOR_TYPE
25978 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25981 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25982 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25983 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25984 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25988 case BUILT_IN_SQRT:
25989 if (out_mode == DFmode && out_n == 2
25990 && in_mode == DFmode && in_n == 2)
25991 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25994 case BUILT_IN_SQRTF:
25995 if (out_mode == SFmode && out_n == 4
25996 && in_mode == SFmode && in_n == 4)
25997 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
26000 case BUILT_IN_LRINT:
26001 if (out_mode == SImode && out_n == 4
26002 && in_mode == DFmode && in_n == 2)
26003 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
26006 case BUILT_IN_LRINTF:
26007 if (out_mode == SImode && out_n == 4
26008 && in_mode == SFmode && in_n == 4)
26009 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26012 case BUILT_IN_COPYSIGN:
26013 if (out_mode == DFmode && out_n == 2
26014 && in_mode == DFmode && in_n == 2)
26015 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26018 case BUILT_IN_COPYSIGNF:
26019 if (out_mode == SFmode && out_n == 4
26020 && in_mode == SFmode && in_n == 4)
26021 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26028 /* Dispatch to a handler for a vectorization library. */
26029 if (ix86_veclib_handler)
26030 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26036 /* Handler for an SVML-style interface to
26037 a library with vectorized intrinsics. */
26040 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26043 tree fntype, new_fndecl, args;
26046 enum machine_mode el_mode, in_mode;
26049 /* The SVML is suitable for unsafe math only. */
26050 if (!flag_unsafe_math_optimizations)
26053 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26054 n = TYPE_VECTOR_SUBPARTS (type_out);
26055 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26056 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26057 if (el_mode != in_mode
26065 case BUILT_IN_LOG10:
26067 case BUILT_IN_TANH:
26069 case BUILT_IN_ATAN:
26070 case BUILT_IN_ATAN2:
26071 case BUILT_IN_ATANH:
26072 case BUILT_IN_CBRT:
26073 case BUILT_IN_SINH:
26075 case BUILT_IN_ASINH:
26076 case BUILT_IN_ASIN:
26077 case BUILT_IN_COSH:
26079 case BUILT_IN_ACOSH:
26080 case BUILT_IN_ACOS:
26081 if (el_mode != DFmode || n != 2)
26085 case BUILT_IN_EXPF:
26086 case BUILT_IN_LOGF:
26087 case BUILT_IN_LOG10F:
26088 case BUILT_IN_POWF:
26089 case BUILT_IN_TANHF:
26090 case BUILT_IN_TANF:
26091 case BUILT_IN_ATANF:
26092 case BUILT_IN_ATAN2F:
26093 case BUILT_IN_ATANHF:
26094 case BUILT_IN_CBRTF:
26095 case BUILT_IN_SINHF:
26096 case BUILT_IN_SINF:
26097 case BUILT_IN_ASINHF:
26098 case BUILT_IN_ASINF:
26099 case BUILT_IN_COSHF:
26100 case BUILT_IN_COSF:
26101 case BUILT_IN_ACOSHF:
26102 case BUILT_IN_ACOSF:
26103 if (el_mode != SFmode || n != 4)
26111 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26113 if (fn == BUILT_IN_LOGF)
26114 strcpy (name, "vmlsLn4");
26115 else if (fn == BUILT_IN_LOG)
26116 strcpy (name, "vmldLn2");
26119 sprintf (name, "vmls%s", bname+10);
26120 name[strlen (name)-1] = '4';
26123 sprintf (name, "vmld%s2", bname+10);
26125 /* Convert to uppercase. */
26129 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26130 args = TREE_CHAIN (args))
26134 fntype = build_function_type_list (type_out, type_in, NULL);
26136 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26138 /* Build a function declaration for the vectorized function. */
26139 new_fndecl = build_decl (BUILTINS_LOCATION,
26140 FUNCTION_DECL, get_identifier (name), fntype);
26141 TREE_PUBLIC (new_fndecl) = 1;
26142 DECL_EXTERNAL (new_fndecl) = 1;
26143 DECL_IS_NOVOPS (new_fndecl) = 1;
26144 TREE_READONLY (new_fndecl) = 1;
26149 /* Handler for an ACML-style interface to
26150 a library with vectorized intrinsics. */
26153 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26155 char name[20] = "__vr.._";
26156 tree fntype, new_fndecl, args;
26159 enum machine_mode el_mode, in_mode;
26162 /* The ACML is 64bits only and suitable for unsafe math only as
26163 it does not correctly support parts of IEEE with the required
26164 precision such as denormals. */
26166 || !flag_unsafe_math_optimizations)
26169 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26170 n = TYPE_VECTOR_SUBPARTS (type_out);
26171 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26172 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26173 if (el_mode != in_mode
26183 case BUILT_IN_LOG2:
26184 case BUILT_IN_LOG10:
26187 if (el_mode != DFmode
26192 case BUILT_IN_SINF:
26193 case BUILT_IN_COSF:
26194 case BUILT_IN_EXPF:
26195 case BUILT_IN_POWF:
26196 case BUILT_IN_LOGF:
26197 case BUILT_IN_LOG2F:
26198 case BUILT_IN_LOG10F:
26201 if (el_mode != SFmode
26210 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26211 sprintf (name + 7, "%s", bname+10);
26214 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26215 args = TREE_CHAIN (args))
26219 fntype = build_function_type_list (type_out, type_in, NULL);
26221 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26223 /* Build a function declaration for the vectorized function. */
26224 new_fndecl = build_decl (BUILTINS_LOCATION,
26225 FUNCTION_DECL, get_identifier (name), fntype);
26226 TREE_PUBLIC (new_fndecl) = 1;
26227 DECL_EXTERNAL (new_fndecl) = 1;
26228 DECL_IS_NOVOPS (new_fndecl) = 1;
26229 TREE_READONLY (new_fndecl) = 1;
26235 /* Returns a decl of a function that implements conversion of an integer vector
26236 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26237 are the types involved when converting according to CODE.
26238 Return NULL_TREE if it is not available. */
26241 ix86_vectorize_builtin_conversion (unsigned int code,
26242 tree dest_type, tree src_type)
26250 switch (TYPE_MODE (src_type))
26253 switch (TYPE_MODE (dest_type))
26256 return (TYPE_UNSIGNED (src_type)
26257 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26258 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26260 return (TYPE_UNSIGNED (src_type)
26262 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26268 switch (TYPE_MODE (dest_type))
26271 return (TYPE_UNSIGNED (src_type)
26273 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26282 case FIX_TRUNC_EXPR:
26283 switch (TYPE_MODE (dest_type))
26286 switch (TYPE_MODE (src_type))
26289 return (TYPE_UNSIGNED (dest_type)
26291 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26293 return (TYPE_UNSIGNED (dest_type)
26295 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26302 switch (TYPE_MODE (src_type))
26305 return (TYPE_UNSIGNED (dest_type)
26307 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26324 /* Returns a code for a target-specific builtin that implements
26325 reciprocal of the function, or NULL_TREE if not available. */
26328 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26329 bool sqrt ATTRIBUTE_UNUSED)
26331 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26332 && flag_finite_math_only && !flag_trapping_math
26333 && flag_unsafe_math_optimizations))
26337 /* Machine dependent builtins. */
26340 /* Vectorized version of sqrt to rsqrt conversion. */
26341 case IX86_BUILTIN_SQRTPS_NR:
26342 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26348 /* Normal builtins. */
26351 /* Sqrt to rsqrt conversion. */
26352 case BUILT_IN_SQRTF:
26353 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26360 /* Helper for avx_vpermilps256_operand et al. This is also used by
26361 the expansion functions to turn the parallel back into a mask.
26362 The return value is 0 for no match and the imm8+1 for a match. */
26365 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
26367 unsigned i, nelt = GET_MODE_NUNITS (mode);
26369 unsigned char ipar[8];
26371 if (XVECLEN (par, 0) != (int) nelt)
26374 /* Validate that all of the elements are constants, and not totally
26375 out of range. Copy the data into an integral array to make the
26376 subsequent checks easier. */
26377 for (i = 0; i < nelt; ++i)
26379 rtx er = XVECEXP (par, 0, i);
26380 unsigned HOST_WIDE_INT ei;
26382 if (!CONST_INT_P (er))
26393 /* In the 256-bit DFmode case, we can only move elements within
26395 for (i = 0; i < 2; ++i)
26399 mask |= ipar[i] << i;
26401 for (i = 2; i < 4; ++i)
26405 mask |= (ipar[i] - 2) << i;
26410 /* In the 256-bit SFmode case, we have full freedom of movement
26411 within the low 128-bit lane, but the high 128-bit lane must
26412 mirror the exact same pattern. */
26413 for (i = 0; i < 4; ++i)
26414 if (ipar[i] + 4 != ipar[i + 4])
26421 /* In the 128-bit case, we've full freedom in the placement of
26422 the elements from the source operand. */
26423 for (i = 0; i < nelt; ++i)
26424 mask |= ipar[i] << (i * (nelt / 2));
26428 gcc_unreachable ();
26431 /* Make sure success has a non-zero value by adding one. */
26435 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26436 the expansion functions to turn the parallel back into a mask.
26437 The return value is 0 for no match and the imm8+1 for a match. */
26440 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26442 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26444 unsigned char ipar[8];
26446 if (XVECLEN (par, 0) != (int) nelt)
26449 /* Validate that all of the elements are constants, and not totally
26450 out of range. Copy the data into an integral array to make the
26451 subsequent checks easier. */
26452 for (i = 0; i < nelt; ++i)
26454 rtx er = XVECEXP (par, 0, i);
26455 unsigned HOST_WIDE_INT ei;
26457 if (!CONST_INT_P (er))
26460 if (ei >= 2 * nelt)
26465 /* Validate that the halves of the permute are halves. */
26466 for (i = 0; i < nelt2 - 1; ++i)
26467 if (ipar[i] + 1 != ipar[i + 1])
26469 for (i = nelt2; i < nelt - 1; ++i)
26470 if (ipar[i] + 1 != ipar[i + 1])
26473 /* Reconstruct the mask. */
26474 for (i = 0; i < 2; ++i)
26476 unsigned e = ipar[i * nelt2];
26480 mask |= e << (i * 4);
26483 /* Make sure success has a non-zero value by adding one. */
26488 /* Store OPERAND to the memory after reload is completed. This means
26489 that we can't easily use assign_stack_local. */
26491 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26495 gcc_assert (reload_completed);
26496 if (ix86_using_red_zone ())
26498 result = gen_rtx_MEM (mode,
26499 gen_rtx_PLUS (Pmode,
26501 GEN_INT (-RED_ZONE_SIZE)));
26502 emit_move_insn (result, operand);
26504 else if (TARGET_64BIT)
26510 operand = gen_lowpart (DImode, operand);
26514 gen_rtx_SET (VOIDmode,
26515 gen_rtx_MEM (DImode,
26516 gen_rtx_PRE_DEC (DImode,
26517 stack_pointer_rtx)),
26521 gcc_unreachable ();
26523 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26532 split_double_mode (mode, &operand, 1, operands, operands + 1);
26534 gen_rtx_SET (VOIDmode,
26535 gen_rtx_MEM (SImode,
26536 gen_rtx_PRE_DEC (Pmode,
26537 stack_pointer_rtx)),
26540 gen_rtx_SET (VOIDmode,
26541 gen_rtx_MEM (SImode,
26542 gen_rtx_PRE_DEC (Pmode,
26543 stack_pointer_rtx)),
26548 /* Store HImodes as SImodes. */
26549 operand = gen_lowpart (SImode, operand);
26553 gen_rtx_SET (VOIDmode,
26554 gen_rtx_MEM (GET_MODE (operand),
26555 gen_rtx_PRE_DEC (SImode,
26556 stack_pointer_rtx)),
26560 gcc_unreachable ();
26562 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26567 /* Free operand from the memory. */
26569 ix86_free_from_memory (enum machine_mode mode)
26571 if (!ix86_using_red_zone ())
26575 if (mode == DImode || TARGET_64BIT)
26579 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26580 to pop or add instruction if registers are available. */
26581 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26582 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26587 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26588 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26590 static const reg_class_t *
26591 i386_ira_cover_classes (void)
26593 static const reg_class_t sse_fpmath_classes[] = {
26594 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26596 static const reg_class_t no_sse_fpmath_classes[] = {
26597 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26600 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26603 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26604 QImode must go into class Q_REGS.
26605 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26606 movdf to do mem-to-mem moves through integer regs. */
26608 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26610 enum machine_mode mode = GET_MODE (x);
26612 /* We're only allowed to return a subclass of CLASS. Many of the
26613 following checks fail for NO_REGS, so eliminate that early. */
26614 if (regclass == NO_REGS)
26617 /* All classes can load zeros. */
26618 if (x == CONST0_RTX (mode))
26621 /* Force constants into memory if we are loading a (nonzero) constant into
26622 an MMX or SSE register. This is because there are no MMX/SSE instructions
26623 to load from a constant. */
26625 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26628 /* Prefer SSE regs only, if we can use them for math. */
26629 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26630 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26632 /* Floating-point constants need more complex checks. */
26633 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26635 /* General regs can load everything. */
26636 if (reg_class_subset_p (regclass, GENERAL_REGS))
26639 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26640 zero above. We only want to wind up preferring 80387 registers if
26641 we plan on doing computation with them. */
26643 && standard_80387_constant_p (x))
26645 /* Limit class to non-sse. */
26646 if (regclass == FLOAT_SSE_REGS)
26648 if (regclass == FP_TOP_SSE_REGS)
26650 if (regclass == FP_SECOND_SSE_REGS)
26651 return FP_SECOND_REG;
26652 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26659 /* Generally when we see PLUS here, it's the function invariant
26660 (plus soft-fp const_int). Which can only be computed into general
26662 if (GET_CODE (x) == PLUS)
26663 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26665 /* QImode constants are easy to load, but non-constant QImode data
26666 must go into Q_REGS. */
26667 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26669 if (reg_class_subset_p (regclass, Q_REGS))
26671 if (reg_class_subset_p (Q_REGS, regclass))
26679 /* Discourage putting floating-point values in SSE registers unless
26680 SSE math is being used, and likewise for the 387 registers. */
26682 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26684 enum machine_mode mode = GET_MODE (x);
26686 /* Restrict the output reload class to the register bank that we are doing
26687 math on. If we would like not to return a subset of CLASS, reject this
26688 alternative: if reload cannot do this, it will still use its choice. */
26689 mode = GET_MODE (x);
26690 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26691 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26693 if (X87_FLOAT_MODE_P (mode))
26695 if (regclass == FP_TOP_SSE_REGS)
26697 else if (regclass == FP_SECOND_SSE_REGS)
26698 return FP_SECOND_REG;
26700 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26707 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26708 enum machine_mode mode,
26709 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26711 /* QImode spills from non-QI registers require
26712 intermediate register on 32bit targets. */
26713 if (!in_p && mode == QImode && !TARGET_64BIT
26714 && (rclass == GENERAL_REGS
26715 || rclass == LEGACY_REGS
26716 || rclass == INDEX_REGS))
26725 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26726 regno = true_regnum (x);
26728 /* Return Q_REGS if the operand is in memory. */
26736 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26739 ix86_class_likely_spilled_p (reg_class_t rclass)
26750 case SSE_FIRST_REG:
26752 case FP_SECOND_REG:
26762 /* If we are copying between general and FP registers, we need a memory
26763 location. The same is true for SSE and MMX registers.
26765 To optimize register_move_cost performance, allow inline variant.
26767 The macro can't work reliably when one of the CLASSES is class containing
26768 registers from multiple units (SSE, MMX, integer). We avoid this by never
26769 combining those units in single alternative in the machine description.
26770 Ensure that this constraint holds to avoid unexpected surprises.
26772 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26773 enforce these sanity checks. */
26776 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26777 enum machine_mode mode, int strict)
26779 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26780 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26781 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26782 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26783 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26784 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26786 gcc_assert (!strict);
26790 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26793 /* ??? This is a lie. We do have moves between mmx/general, and for
26794 mmx/sse2. But by saying we need secondary memory we discourage the
26795 register allocator from using the mmx registers unless needed. */
26796 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26799 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26801 /* SSE1 doesn't have any direct moves from other classes. */
26805 /* If the target says that inter-unit moves are more expensive
26806 than moving through memory, then don't generate them. */
26807 if (!TARGET_INTER_UNIT_MOVES)
26810 /* Between SSE and general, we have moves no larger than word size. */
26811 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26819 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26820 enum machine_mode mode, int strict)
26822 return inline_secondary_memory_needed (class1, class2, mode, strict);
26825 /* Return true if the registers in CLASS cannot represent the change from
26826 modes FROM to TO. */
26829 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26830 enum reg_class regclass)
26835 /* x87 registers can't do subreg at all, as all values are reformatted
26836 to extended precision. */
26837 if (MAYBE_FLOAT_CLASS_P (regclass))
26840 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26842 /* Vector registers do not support QI or HImode loads. If we don't
26843 disallow a change to these modes, reload will assume it's ok to
26844 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26845 the vec_dupv4hi pattern. */
26846 if (GET_MODE_SIZE (from) < 4)
26849 /* Vector registers do not support subreg with nonzero offsets, which
26850 are otherwise valid for integer registers. Since we can't see
26851 whether we have a nonzero offset from here, prohibit all
26852 nonparadoxical subregs changing size. */
26853 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26860 /* Return the cost of moving data of mode M between a
26861 register and memory. A value of 2 is the default; this cost is
26862 relative to those in `REGISTER_MOVE_COST'.
26864 This function is used extensively by register_move_cost that is used to
26865 build tables at startup. Make it inline in this case.
26866 When IN is 2, return maximum of in and out move cost.
26868 If moving between registers and memory is more expensive than
26869 between two registers, you should define this macro to express the
26872 Model also increased moving costs of QImode registers in non
26876 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26880 if (FLOAT_CLASS_P (regclass))
26898 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26899 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26901 if (SSE_CLASS_P (regclass))
26904 switch (GET_MODE_SIZE (mode))
26919 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26920 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26922 if (MMX_CLASS_P (regclass))
26925 switch (GET_MODE_SIZE (mode))
26937 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26938 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26940 switch (GET_MODE_SIZE (mode))
26943 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26946 return ix86_cost->int_store[0];
26947 if (TARGET_PARTIAL_REG_DEPENDENCY
26948 && optimize_function_for_speed_p (cfun))
26949 cost = ix86_cost->movzbl_load;
26951 cost = ix86_cost->int_load[0];
26953 return MAX (cost, ix86_cost->int_store[0]);
26959 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26961 return ix86_cost->movzbl_load;
26963 return ix86_cost->int_store[0] + 4;
26968 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26969 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26971 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26972 if (mode == TFmode)
26975 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26977 cost = ix86_cost->int_load[2];
26979 cost = ix86_cost->int_store[2];
26980 return (cost * (((int) GET_MODE_SIZE (mode)
26981 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26986 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26989 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26993 /* Return the cost of moving data from a register in class CLASS1 to
26994 one in class CLASS2.
26996 It is not required that the cost always equal 2 when FROM is the same as TO;
26997 on some machines it is expensive to move between registers if they are not
26998 general registers. */
27001 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
27002 reg_class_t class2_i)
27004 enum reg_class class1 = (enum reg_class) class1_i;
27005 enum reg_class class2 = (enum reg_class) class2_i;
27007 /* In case we require secondary memory, compute cost of the store followed
27008 by load. In order to avoid bad register allocation choices, we need
27009 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27011 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27015 cost += inline_memory_move_cost (mode, class1, 2);
27016 cost += inline_memory_move_cost (mode, class2, 2);
27018 /* In case of copying from general_purpose_register we may emit multiple
27019 stores followed by single load causing memory size mismatch stall.
27020 Count this as arbitrarily high cost of 20. */
27021 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27024 /* In the case of FP/MMX moves, the registers actually overlap, and we
27025 have to switch modes in order to treat them differently. */
27026 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27027 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27033 /* Moves between SSE/MMX and integer unit are expensive. */
27034 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27035 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27037 /* ??? By keeping returned value relatively high, we limit the number
27038 of moves between integer and MMX/SSE registers for all targets.
27039 Additionally, high value prevents problem with x86_modes_tieable_p(),
27040 where integer modes in MMX/SSE registers are not tieable
27041 because of missing QImode and HImode moves to, from or between
27042 MMX/SSE registers. */
27043 return MAX (8, ix86_cost->mmxsse_to_integer);
27045 if (MAYBE_FLOAT_CLASS_P (class1))
27046 return ix86_cost->fp_move;
27047 if (MAYBE_SSE_CLASS_P (class1))
27048 return ix86_cost->sse_move;
27049 if (MAYBE_MMX_CLASS_P (class1))
27050 return ix86_cost->mmx_move;
27054 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27057 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27059 /* Flags and only flags can only hold CCmode values. */
27060 if (CC_REGNO_P (regno))
27061 return GET_MODE_CLASS (mode) == MODE_CC;
27062 if (GET_MODE_CLASS (mode) == MODE_CC
27063 || GET_MODE_CLASS (mode) == MODE_RANDOM
27064 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27066 if (FP_REGNO_P (regno))
27067 return VALID_FP_MODE_P (mode);
27068 if (SSE_REGNO_P (regno))
27070 /* We implement the move patterns for all vector modes into and
27071 out of SSE registers, even when no operation instructions
27072 are available. OImode move is available only when AVX is
27074 return ((TARGET_AVX && mode == OImode)
27075 || VALID_AVX256_REG_MODE (mode)
27076 || VALID_SSE_REG_MODE (mode)
27077 || VALID_SSE2_REG_MODE (mode)
27078 || VALID_MMX_REG_MODE (mode)
27079 || VALID_MMX_REG_MODE_3DNOW (mode));
27081 if (MMX_REGNO_P (regno))
27083 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27084 so if the register is available at all, then we can move data of
27085 the given mode into or out of it. */
27086 return (VALID_MMX_REG_MODE (mode)
27087 || VALID_MMX_REG_MODE_3DNOW (mode));
27090 if (mode == QImode)
27092 /* Take care for QImode values - they can be in non-QI regs,
27093 but then they do cause partial register stalls. */
27094 if (regno <= BX_REG || TARGET_64BIT)
27096 if (!TARGET_PARTIAL_REG_STALL)
27098 return reload_in_progress || reload_completed;
27100 /* We handle both integer and floats in the general purpose registers. */
27101 else if (VALID_INT_MODE_P (mode))
27103 else if (VALID_FP_MODE_P (mode))
27105 else if (VALID_DFP_MODE_P (mode))
27107 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27108 on to use that value in smaller contexts, this can easily force a
27109 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27110 supporting DImode, allow it. */
27111 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27117 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27118 tieable integer mode. */
27121 ix86_tieable_integer_mode_p (enum machine_mode mode)
27130 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27133 return TARGET_64BIT;
27140 /* Return true if MODE1 is accessible in a register that can hold MODE2
27141 without copying. That is, all register classes that can hold MODE2
27142 can also hold MODE1. */
27145 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27147 if (mode1 == mode2)
27150 if (ix86_tieable_integer_mode_p (mode1)
27151 && ix86_tieable_integer_mode_p (mode2))
27154 /* MODE2 being XFmode implies fp stack or general regs, which means we
27155 can tie any smaller floating point modes to it. Note that we do not
27156 tie this with TFmode. */
27157 if (mode2 == XFmode)
27158 return mode1 == SFmode || mode1 == DFmode;
27160 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27161 that we can tie it with SFmode. */
27162 if (mode2 == DFmode)
27163 return mode1 == SFmode;
27165 /* If MODE2 is only appropriate for an SSE register, then tie with
27166 any other mode acceptable to SSE registers. */
27167 if (GET_MODE_SIZE (mode2) == 16
27168 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27169 return (GET_MODE_SIZE (mode1) == 16
27170 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27172 /* If MODE2 is appropriate for an MMX register, then tie
27173 with any other mode acceptable to MMX registers. */
27174 if (GET_MODE_SIZE (mode2) == 8
27175 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27176 return (GET_MODE_SIZE (mode1) == 8
27177 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27182 /* Compute a (partial) cost for rtx X. Return true if the complete
27183 cost has been computed, and false if subexpressions should be
27184 scanned. In either case, *TOTAL contains the cost result. */
27187 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27189 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27190 enum machine_mode mode = GET_MODE (x);
27191 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27199 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27201 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27203 else if (flag_pic && SYMBOLIC_CONST (x)
27205 || (!GET_CODE (x) != LABEL_REF
27206 && (GET_CODE (x) != SYMBOL_REF
27207 || !SYMBOL_REF_LOCAL_P (x)))))
27214 if (mode == VOIDmode)
27217 switch (standard_80387_constant_p (x))
27222 default: /* Other constants */
27227 /* Start with (MEM (SYMBOL_REF)), since that's where
27228 it'll probably end up. Add a penalty for size. */
27229 *total = (COSTS_N_INSNS (1)
27230 + (flag_pic != 0 && !TARGET_64BIT)
27231 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27237 /* The zero extensions is often completely free on x86_64, so make
27238 it as cheap as possible. */
27239 if (TARGET_64BIT && mode == DImode
27240 && GET_MODE (XEXP (x, 0)) == SImode)
27242 else if (TARGET_ZERO_EXTEND_WITH_AND)
27243 *total = cost->add;
27245 *total = cost->movzx;
27249 *total = cost->movsx;
27253 if (CONST_INT_P (XEXP (x, 1))
27254 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27256 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27259 *total = cost->add;
27262 if ((value == 2 || value == 3)
27263 && cost->lea <= cost->shift_const)
27265 *total = cost->lea;
27275 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27277 if (CONST_INT_P (XEXP (x, 1)))
27279 if (INTVAL (XEXP (x, 1)) > 32)
27280 *total = cost->shift_const + COSTS_N_INSNS (2);
27282 *total = cost->shift_const * 2;
27286 if (GET_CODE (XEXP (x, 1)) == AND)
27287 *total = cost->shift_var * 2;
27289 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27294 if (CONST_INT_P (XEXP (x, 1)))
27295 *total = cost->shift_const;
27297 *total = cost->shift_var;
27302 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27304 /* ??? SSE scalar cost should be used here. */
27305 *total = cost->fmul;
27308 else if (X87_FLOAT_MODE_P (mode))
27310 *total = cost->fmul;
27313 else if (FLOAT_MODE_P (mode))
27315 /* ??? SSE vector cost should be used here. */
27316 *total = cost->fmul;
27321 rtx op0 = XEXP (x, 0);
27322 rtx op1 = XEXP (x, 1);
27324 if (CONST_INT_P (XEXP (x, 1)))
27326 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27327 for (nbits = 0; value != 0; value &= value - 1)
27331 /* This is arbitrary. */
27334 /* Compute costs correctly for widening multiplication. */
27335 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27336 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27337 == GET_MODE_SIZE (mode))
27339 int is_mulwiden = 0;
27340 enum machine_mode inner_mode = GET_MODE (op0);
27342 if (GET_CODE (op0) == GET_CODE (op1))
27343 is_mulwiden = 1, op1 = XEXP (op1, 0);
27344 else if (CONST_INT_P (op1))
27346 if (GET_CODE (op0) == SIGN_EXTEND)
27347 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27350 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27354 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
27357 *total = (cost->mult_init[MODE_INDEX (mode)]
27358 + nbits * cost->mult_bit
27359 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27368 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27369 /* ??? SSE cost should be used here. */
27370 *total = cost->fdiv;
27371 else if (X87_FLOAT_MODE_P (mode))
27372 *total = cost->fdiv;
27373 else if (FLOAT_MODE_P (mode))
27374 /* ??? SSE vector cost should be used here. */
27375 *total = cost->fdiv;
27377 *total = cost->divide[MODE_INDEX (mode)];
27381 if (GET_MODE_CLASS (mode) == MODE_INT
27382 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27384 if (GET_CODE (XEXP (x, 0)) == PLUS
27385 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27386 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27387 && CONSTANT_P (XEXP (x, 1)))
27389 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27390 if (val == 2 || val == 4 || val == 8)
27392 *total = cost->lea;
27393 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27394 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27395 outer_code, speed);
27396 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27400 else if (GET_CODE (XEXP (x, 0)) == MULT
27401 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27403 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27404 if (val == 2 || val == 4 || val == 8)
27406 *total = cost->lea;
27407 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27408 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27412 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27414 *total = cost->lea;
27415 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27416 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27417 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27424 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27426 /* ??? SSE cost should be used here. */
27427 *total = cost->fadd;
27430 else if (X87_FLOAT_MODE_P (mode))
27432 *total = cost->fadd;
27435 else if (FLOAT_MODE_P (mode))
27437 /* ??? SSE vector cost should be used here. */
27438 *total = cost->fadd;
27446 if (!TARGET_64BIT && mode == DImode)
27448 *total = (cost->add * 2
27449 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27450 << (GET_MODE (XEXP (x, 0)) != DImode))
27451 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27452 << (GET_MODE (XEXP (x, 1)) != DImode)));
27458 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27460 /* ??? SSE cost should be used here. */
27461 *total = cost->fchs;
27464 else if (X87_FLOAT_MODE_P (mode))
27466 *total = cost->fchs;
27469 else if (FLOAT_MODE_P (mode))
27471 /* ??? SSE vector cost should be used here. */
27472 *total = cost->fchs;
27478 if (!TARGET_64BIT && mode == DImode)
27479 *total = cost->add * 2;
27481 *total = cost->add;
27485 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27486 && XEXP (XEXP (x, 0), 1) == const1_rtx
27487 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27488 && XEXP (x, 1) == const0_rtx)
27490 /* This kind of construct is implemented using test[bwl].
27491 Treat it as if we had an AND. */
27492 *total = (cost->add
27493 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27494 + rtx_cost (const1_rtx, outer_code, speed));
27500 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27505 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27506 /* ??? SSE cost should be used here. */
27507 *total = cost->fabs;
27508 else if (X87_FLOAT_MODE_P (mode))
27509 *total = cost->fabs;
27510 else if (FLOAT_MODE_P (mode))
27511 /* ??? SSE vector cost should be used here. */
27512 *total = cost->fabs;
27516 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27517 /* ??? SSE cost should be used here. */
27518 *total = cost->fsqrt;
27519 else if (X87_FLOAT_MODE_P (mode))
27520 *total = cost->fsqrt;
27521 else if (FLOAT_MODE_P (mode))
27522 /* ??? SSE vector cost should be used here. */
27523 *total = cost->fsqrt;
27527 if (XINT (x, 1) == UNSPEC_TP)
27534 case VEC_DUPLICATE:
27535 /* ??? Assume all of these vector manipulation patterns are
27536 recognizable. In which case they all pretty much have the
27538 *total = COSTS_N_INSNS (1);
27548 static int current_machopic_label_num;
27550 /* Given a symbol name and its associated stub, write out the
27551 definition of the stub. */
27554 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27556 unsigned int length;
27557 char *binder_name, *symbol_name, lazy_ptr_name[32];
27558 int label = ++current_machopic_label_num;
27560 /* For 64-bit we shouldn't get here. */
27561 gcc_assert (!TARGET_64BIT);
27563 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27564 symb = targetm.strip_name_encoding (symb);
27566 length = strlen (stub);
27567 binder_name = XALLOCAVEC (char, length + 32);
27568 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27570 length = strlen (symb);
27571 symbol_name = XALLOCAVEC (char, length + 32);
27572 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27574 sprintf (lazy_ptr_name, "L%d$lz", label);
27577 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27579 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27581 fprintf (file, "%s:\n", stub);
27582 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27586 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27587 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27588 fprintf (file, "\tjmp\t*%%edx\n");
27591 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27593 fprintf (file, "%s:\n", binder_name);
27597 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27598 fputs ("\tpushl\t%eax\n", file);
27601 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27603 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27605 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27606 fprintf (file, "%s:\n", lazy_ptr_name);
27607 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27608 fprintf (file, ASM_LONG "%s\n", binder_name);
27610 #endif /* TARGET_MACHO */
27612 /* Order the registers for register allocator. */
27615 x86_order_regs_for_local_alloc (void)
27620 /* First allocate the local general purpose registers. */
27621 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27622 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27623 reg_alloc_order [pos++] = i;
27625 /* Global general purpose registers. */
27626 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27627 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27628 reg_alloc_order [pos++] = i;
27630 /* x87 registers come first in case we are doing FP math
27632 if (!TARGET_SSE_MATH)
27633 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27634 reg_alloc_order [pos++] = i;
27636 /* SSE registers. */
27637 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27638 reg_alloc_order [pos++] = i;
27639 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27640 reg_alloc_order [pos++] = i;
27642 /* x87 registers. */
27643 if (TARGET_SSE_MATH)
27644 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27645 reg_alloc_order [pos++] = i;
27647 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27648 reg_alloc_order [pos++] = i;
27650 /* Initialize the rest of array as we do not allocate some registers
27652 while (pos < FIRST_PSEUDO_REGISTER)
27653 reg_alloc_order [pos++] = 0;
27656 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27657 struct attribute_spec.handler. */
27659 ix86_handle_abi_attribute (tree *node, tree name,
27660 tree args ATTRIBUTE_UNUSED,
27661 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27663 if (TREE_CODE (*node) != FUNCTION_TYPE
27664 && TREE_CODE (*node) != METHOD_TYPE
27665 && TREE_CODE (*node) != FIELD_DECL
27666 && TREE_CODE (*node) != TYPE_DECL)
27668 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27670 *no_add_attrs = true;
27675 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27677 *no_add_attrs = true;
27681 /* Can combine regparm with all attributes but fastcall. */
27682 if (is_attribute_p ("ms_abi", name))
27684 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27686 error ("ms_abi and sysv_abi attributes are not compatible");
27691 else if (is_attribute_p ("sysv_abi", name))
27693 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27695 error ("ms_abi and sysv_abi attributes are not compatible");
27704 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27705 struct attribute_spec.handler. */
27707 ix86_handle_struct_attribute (tree *node, tree name,
27708 tree args ATTRIBUTE_UNUSED,
27709 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27712 if (DECL_P (*node))
27714 if (TREE_CODE (*node) == TYPE_DECL)
27715 type = &TREE_TYPE (*node);
27720 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27721 || TREE_CODE (*type) == UNION_TYPE)))
27723 warning (OPT_Wattributes, "%qE attribute ignored",
27725 *no_add_attrs = true;
27728 else if ((is_attribute_p ("ms_struct", name)
27729 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27730 || ((is_attribute_p ("gcc_struct", name)
27731 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27733 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27735 *no_add_attrs = true;
27742 ix86_handle_fndecl_attribute (tree *node, tree name,
27743 tree args ATTRIBUTE_UNUSED,
27744 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27746 if (TREE_CODE (*node) != FUNCTION_DECL)
27748 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27750 *no_add_attrs = true;
27756 ix86_ms_bitfield_layout_p (const_tree record_type)
27758 return ((TARGET_MS_BITFIELD_LAYOUT
27759 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27760 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27763 /* Returns an expression indicating where the this parameter is
27764 located on entry to the FUNCTION. */
27767 x86_this_parameter (tree function)
27769 tree type = TREE_TYPE (function);
27770 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27775 const int *parm_regs;
27777 if (ix86_function_type_abi (type) == MS_ABI)
27778 parm_regs = x86_64_ms_abi_int_parameter_registers;
27780 parm_regs = x86_64_int_parameter_registers;
27781 return gen_rtx_REG (DImode, parm_regs[aggr]);
27784 nregs = ix86_function_regparm (type, function);
27786 if (nregs > 0 && !stdarg_p (type))
27790 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27791 regno = aggr ? DX_REG : CX_REG;
27792 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27796 return gen_rtx_MEM (SImode,
27797 plus_constant (stack_pointer_rtx, 4));
27806 return gen_rtx_MEM (SImode,
27807 plus_constant (stack_pointer_rtx, 4));
27810 return gen_rtx_REG (SImode, regno);
27813 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27816 /* Determine whether x86_output_mi_thunk can succeed. */
27819 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27820 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27821 HOST_WIDE_INT vcall_offset, const_tree function)
27823 /* 64-bit can handle anything. */
27827 /* For 32-bit, everything's fine if we have one free register. */
27828 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27831 /* Need a free register for vcall_offset. */
27835 /* Need a free register for GOT references. */
27836 if (flag_pic && !targetm.binds_local_p (function))
27839 /* Otherwise ok. */
27843 /* Output the assembler code for a thunk function. THUNK_DECL is the
27844 declaration for the thunk function itself, FUNCTION is the decl for
27845 the target function. DELTA is an immediate constant offset to be
27846 added to THIS. If VCALL_OFFSET is nonzero, the word at
27847 *(*this + vcall_offset) should be added to THIS. */
27850 x86_output_mi_thunk (FILE *file,
27851 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27852 HOST_WIDE_INT vcall_offset, tree function)
27855 rtx this_param = x86_this_parameter (function);
27858 /* Make sure unwind info is emitted for the thunk if needed. */
27859 final_start_function (emit_barrier (), file, 1);
27861 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27862 pull it in now and let DELTA benefit. */
27863 if (REG_P (this_param))
27864 this_reg = this_param;
27865 else if (vcall_offset)
27867 /* Put the this parameter into %eax. */
27868 xops[0] = this_param;
27869 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27870 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27873 this_reg = NULL_RTX;
27875 /* Adjust the this parameter by a fixed constant. */
27878 xops[0] = GEN_INT (delta);
27879 xops[1] = this_reg ? this_reg : this_param;
27882 if (!x86_64_general_operand (xops[0], DImode))
27884 tmp = gen_rtx_REG (DImode, R10_REG);
27886 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27888 xops[1] = this_param;
27890 if (x86_maybe_negate_const_int (&xops[0], DImode))
27891 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27893 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27895 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27896 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27898 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27901 /* Adjust the this parameter by a value stored in the vtable. */
27905 tmp = gen_rtx_REG (DImode, R10_REG);
27908 int tmp_regno = CX_REG;
27909 if (lookup_attribute ("fastcall",
27910 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27911 || lookup_attribute ("thiscall",
27912 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27913 tmp_regno = AX_REG;
27914 tmp = gen_rtx_REG (SImode, tmp_regno);
27917 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27919 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27921 /* Adjust the this parameter. */
27922 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27923 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27925 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27926 xops[0] = GEN_INT (vcall_offset);
27928 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27929 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27931 xops[1] = this_reg;
27932 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27935 /* If necessary, drop THIS back to its stack slot. */
27936 if (this_reg && this_reg != this_param)
27938 xops[0] = this_reg;
27939 xops[1] = this_param;
27940 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27943 xops[0] = XEXP (DECL_RTL (function), 0);
27946 if (!flag_pic || targetm.binds_local_p (function))
27947 output_asm_insn ("jmp\t%P0", xops);
27948 /* All thunks should be in the same object as their target,
27949 and thus binds_local_p should be true. */
27950 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27951 gcc_unreachable ();
27954 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27955 tmp = gen_rtx_CONST (Pmode, tmp);
27956 tmp = gen_rtx_MEM (QImode, tmp);
27958 output_asm_insn ("jmp\t%A0", xops);
27963 if (!flag_pic || targetm.binds_local_p (function))
27964 output_asm_insn ("jmp\t%P0", xops);
27969 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27970 if (TARGET_MACHO_BRANCH_ISLANDS)
27971 sym_ref = (gen_rtx_SYMBOL_REF
27973 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27974 tmp = gen_rtx_MEM (QImode, sym_ref);
27976 output_asm_insn ("jmp\t%0", xops);
27979 #endif /* TARGET_MACHO */
27981 tmp = gen_rtx_REG (SImode, CX_REG);
27982 output_set_got (tmp, NULL_RTX);
27985 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27986 output_asm_insn ("jmp\t{*}%1", xops);
27989 final_end_function ();
27993 x86_file_start (void)
27995 default_file_start ();
27997 darwin_file_start ();
27999 if (X86_FILE_START_VERSION_DIRECTIVE)
28000 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
28001 if (X86_FILE_START_FLTUSED)
28002 fputs ("\t.global\t__fltused\n", asm_out_file);
28003 if (ix86_asm_dialect == ASM_INTEL)
28004 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28008 x86_field_alignment (tree field, int computed)
28010 enum machine_mode mode;
28011 tree type = TREE_TYPE (field);
28013 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28015 mode = TYPE_MODE (strip_array_types (type));
28016 if (mode == DFmode || mode == DCmode
28017 || GET_MODE_CLASS (mode) == MODE_INT
28018 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28019 return MIN (32, computed);
28023 /* Output assembler code to FILE to increment profiler label # LABELNO
28024 for profiling a function entry. */
28026 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28028 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28033 #ifndef NO_PROFILE_COUNTERS
28034 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28037 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28038 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28040 fprintf (file, "\tcall\t%s\n", mcount_name);
28044 #ifndef NO_PROFILE_COUNTERS
28045 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28048 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28052 #ifndef NO_PROFILE_COUNTERS
28053 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28056 fprintf (file, "\tcall\t%s\n", mcount_name);
28060 /* We don't have exact information about the insn sizes, but we may assume
28061 quite safely that we are informed about all 1 byte insns and memory
28062 address sizes. This is enough to eliminate unnecessary padding in
28066 min_insn_size (rtx insn)
28070 if (!INSN_P (insn) || !active_insn_p (insn))
28073 /* Discard alignments we've emit and jump instructions. */
28074 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28075 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28077 if (JUMP_TABLE_DATA_P (insn))
28080 /* Important case - calls are always 5 bytes.
28081 It is common to have many calls in the row. */
28083 && symbolic_reference_mentioned_p (PATTERN (insn))
28084 && !SIBLING_CALL_P (insn))
28086 len = get_attr_length (insn);
28090 /* For normal instructions we rely on get_attr_length being exact,
28091 with a few exceptions. */
28092 if (!JUMP_P (insn))
28094 enum attr_type type = get_attr_type (insn);
28099 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28100 || asm_noperands (PATTERN (insn)) >= 0)
28107 /* Otherwise trust get_attr_length. */
28111 l = get_attr_length_address (insn);
28112 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28121 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28123 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28127 ix86_avoid_jump_mispredicts (void)
28129 rtx insn, start = get_insns ();
28130 int nbytes = 0, njumps = 0;
28133 /* Look for all minimal intervals of instructions containing 4 jumps.
28134 The intervals are bounded by START and INSN. NBYTES is the total
28135 size of instructions in the interval including INSN and not including
28136 START. When the NBYTES is smaller than 16 bytes, it is possible
28137 that the end of START and INSN ends up in the same 16byte page.
28139 The smallest offset in the page INSN can start is the case where START
28140 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28141 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28143 for (insn = start; insn; insn = NEXT_INSN (insn))
28147 if (LABEL_P (insn))
28149 int align = label_to_alignment (insn);
28150 int max_skip = label_to_max_skip (insn);
28154 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28155 already in the current 16 byte page, because otherwise
28156 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28157 bytes to reach 16 byte boundary. */
28159 || (align <= 3 && max_skip != (1 << align) - 1))
28162 fprintf (dump_file, "Label %i with max_skip %i\n",
28163 INSN_UID (insn), max_skip);
28166 while (nbytes + max_skip >= 16)
28168 start = NEXT_INSN (start);
28169 if ((JUMP_P (start)
28170 && GET_CODE (PATTERN (start)) != ADDR_VEC
28171 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28173 njumps--, isjump = 1;
28176 nbytes -= min_insn_size (start);
28182 min_size = min_insn_size (insn);
28183 nbytes += min_size;
28185 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28186 INSN_UID (insn), min_size);
28188 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28189 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28197 start = NEXT_INSN (start);
28198 if ((JUMP_P (start)
28199 && GET_CODE (PATTERN (start)) != ADDR_VEC
28200 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28202 njumps--, isjump = 1;
28205 nbytes -= min_insn_size (start);
28207 gcc_assert (njumps >= 0);
28209 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28210 INSN_UID (start), INSN_UID (insn), nbytes);
28212 if (njumps == 3 && isjump && nbytes < 16)
28214 int padsize = 15 - nbytes + min_insn_size (insn);
28217 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28218 INSN_UID (insn), padsize);
28219 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28225 /* AMD Athlon works faster
28226 when RET is not destination of conditional jump or directly preceded
28227 by other jump instruction. We avoid the penalty by inserting NOP just
28228 before the RET instructions in such cases. */
28230 ix86_pad_returns (void)
28235 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28237 basic_block bb = e->src;
28238 rtx ret = BB_END (bb);
28240 bool replace = false;
28242 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28243 || optimize_bb_for_size_p (bb))
28245 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28246 if (active_insn_p (prev) || LABEL_P (prev))
28248 if (prev && LABEL_P (prev))
28253 FOR_EACH_EDGE (e, ei, bb->preds)
28254 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28255 && !(e->flags & EDGE_FALLTHRU))
28260 prev = prev_active_insn (ret);
28262 && ((JUMP_P (prev) && any_condjump_p (prev))
28265 /* Empty functions get branch mispredict even when the jump destination
28266 is not visible to us. */
28267 if (!prev && !optimize_function_for_size_p (cfun))
28272 emit_jump_insn_before (gen_return_internal_long (), ret);
28278 /* Count the minimum number of instructions in BB. Return 4 if the
28279 number of instructions >= 4. */
28282 ix86_count_insn_bb (basic_block bb)
28285 int insn_count = 0;
28287 /* Count number of instructions in this block. Return 4 if the number
28288 of instructions >= 4. */
28289 FOR_BB_INSNS (bb, insn)
28291 /* Only happen in exit blocks. */
28293 && GET_CODE (PATTERN (insn)) == RETURN)
28296 if (NONDEBUG_INSN_P (insn)
28297 && GET_CODE (PATTERN (insn)) != USE
28298 && GET_CODE (PATTERN (insn)) != CLOBBER)
28301 if (insn_count >= 4)
28310 /* Count the minimum number of instructions in code path in BB.
28311 Return 4 if the number of instructions >= 4. */
28314 ix86_count_insn (basic_block bb)
28318 int min_prev_count;
28320 /* Only bother counting instructions along paths with no
28321 more than 2 basic blocks between entry and exit. Given
28322 that BB has an edge to exit, determine if a predecessor
28323 of BB has an edge from entry. If so, compute the number
28324 of instructions in the predecessor block. If there
28325 happen to be multiple such blocks, compute the minimum. */
28326 min_prev_count = 4;
28327 FOR_EACH_EDGE (e, ei, bb->preds)
28330 edge_iterator prev_ei;
28332 if (e->src == ENTRY_BLOCK_PTR)
28334 min_prev_count = 0;
28337 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28339 if (prev_e->src == ENTRY_BLOCK_PTR)
28341 int count = ix86_count_insn_bb (e->src);
28342 if (count < min_prev_count)
28343 min_prev_count = count;
28349 if (min_prev_count < 4)
28350 min_prev_count += ix86_count_insn_bb (bb);
28352 return min_prev_count;
28355 /* Pad short funtion to 4 instructions. */
28358 ix86_pad_short_function (void)
28363 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28365 rtx ret = BB_END (e->src);
28366 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28368 int insn_count = ix86_count_insn (e->src);
28370 /* Pad short function. */
28371 if (insn_count < 4)
28375 /* Find epilogue. */
28378 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28379 insn = PREV_INSN (insn);
28384 /* Two NOPs are counted as one instruction. */
28385 insn_count = 2 * (4 - insn_count);
28386 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28392 /* Implement machine specific optimizations. We implement padding of returns
28393 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28397 if (optimize && optimize_function_for_speed_p (cfun))
28399 if (TARGET_PAD_SHORT_FUNCTION)
28400 ix86_pad_short_function ();
28401 else if (TARGET_PAD_RETURNS)
28402 ix86_pad_returns ();
28403 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28404 if (TARGET_FOUR_JUMP_LIMIT)
28405 ix86_avoid_jump_mispredicts ();
28410 /* Return nonzero when QImode register that must be represented via REX prefix
28413 x86_extended_QIreg_mentioned_p (rtx insn)
28416 extract_insn_cached (insn);
28417 for (i = 0; i < recog_data.n_operands; i++)
28418 if (REG_P (recog_data.operand[i])
28419 && REGNO (recog_data.operand[i]) > BX_REG)
28424 /* Return nonzero when P points to register encoded via REX prefix.
28425 Called via for_each_rtx. */
28427 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28429 unsigned int regno;
28432 regno = REGNO (*p);
28433 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28436 /* Return true when INSN mentions register that must be encoded using REX
28439 x86_extended_reg_mentioned_p (rtx insn)
28441 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28442 extended_reg_mentioned_1, NULL);
28445 /* If profitable, negate (without causing overflow) integer constant
28446 of mode MODE at location LOC. Return true in this case. */
28448 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28452 if (!CONST_INT_P (*loc))
28458 /* DImode x86_64 constants must fit in 32 bits. */
28459 gcc_assert (x86_64_immediate_operand (*loc, mode));
28470 gcc_unreachable ();
28473 /* Avoid overflows. */
28474 if (mode_signbit_p (mode, *loc))
28477 val = INTVAL (*loc);
28479 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28480 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28481 if ((val < 0 && val != -128)
28484 *loc = GEN_INT (-val);
28491 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28492 optabs would emit if we didn't have TFmode patterns. */
28495 x86_emit_floatuns (rtx operands[2])
28497 rtx neglab, donelab, i0, i1, f0, in, out;
28498 enum machine_mode mode, inmode;
28500 inmode = GET_MODE (operands[1]);
28501 gcc_assert (inmode == SImode || inmode == DImode);
28504 in = force_reg (inmode, operands[1]);
28505 mode = GET_MODE (out);
28506 neglab = gen_label_rtx ();
28507 donelab = gen_label_rtx ();
28508 f0 = gen_reg_rtx (mode);
28510 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28512 expand_float (out, in, 0);
28514 emit_jump_insn (gen_jump (donelab));
28517 emit_label (neglab);
28519 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28521 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28523 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28525 expand_float (f0, i0, 0);
28527 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28529 emit_label (donelab);
28532 /* AVX does not support 32-byte integer vector operations,
28533 thus the longest vector we are faced with is V16QImode. */
28534 #define MAX_VECT_LEN 16
28536 struct expand_vec_perm_d
28538 rtx target, op0, op1;
28539 unsigned char perm[MAX_VECT_LEN];
28540 enum machine_mode vmode;
28541 unsigned char nelt;
28545 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28546 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28548 /* Get a vector mode of the same size as the original but with elements
28549 twice as wide. This is only guaranteed to apply to integral vectors. */
28551 static inline enum machine_mode
28552 get_mode_wider_vector (enum machine_mode o)
28554 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28555 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28556 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28557 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28561 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28562 with all elements equal to VAR. Return true if successful. */
28565 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28566 rtx target, rtx val)
28589 /* First attempt to recognize VAL as-is. */
28590 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28591 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28592 if (recog_memoized (insn) < 0)
28595 /* If that fails, force VAL into a register. */
28598 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28599 seq = get_insns ();
28602 emit_insn_before (seq, insn);
28604 ok = recog_memoized (insn) >= 0;
28613 if (TARGET_SSE || TARGET_3DNOW_A)
28617 val = gen_lowpart (SImode, val);
28618 x = gen_rtx_TRUNCATE (HImode, val);
28619 x = gen_rtx_VEC_DUPLICATE (mode, x);
28620 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28633 struct expand_vec_perm_d dperm;
28637 memset (&dperm, 0, sizeof (dperm));
28638 dperm.target = target;
28639 dperm.vmode = mode;
28640 dperm.nelt = GET_MODE_NUNITS (mode);
28641 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28643 /* Extend to SImode using a paradoxical SUBREG. */
28644 tmp1 = gen_reg_rtx (SImode);
28645 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28647 /* Insert the SImode value as low element of a V4SImode vector. */
28648 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28649 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28651 ok = (expand_vec_perm_1 (&dperm)
28652 || expand_vec_perm_broadcast_1 (&dperm));
28664 /* Replicate the value once into the next wider mode and recurse. */
28666 enum machine_mode smode, wsmode, wvmode;
28669 smode = GET_MODE_INNER (mode);
28670 wvmode = get_mode_wider_vector (mode);
28671 wsmode = GET_MODE_INNER (wvmode);
28673 val = convert_modes (wsmode, smode, val, true);
28674 x = expand_simple_binop (wsmode, ASHIFT, val,
28675 GEN_INT (GET_MODE_BITSIZE (smode)),
28676 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28677 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28679 x = gen_lowpart (wvmode, target);
28680 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28688 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28689 rtx x = gen_reg_rtx (hvmode);
28691 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28694 x = gen_rtx_VEC_CONCAT (mode, x, x);
28695 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28704 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28705 whose ONE_VAR element is VAR, and other elements are zero. Return true
28709 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28710 rtx target, rtx var, int one_var)
28712 enum machine_mode vsimode;
28715 bool use_vector_set = false;
28720 /* For SSE4.1, we normally use vector set. But if the second
28721 element is zero and inter-unit moves are OK, we use movq
28723 use_vector_set = (TARGET_64BIT
28725 && !(TARGET_INTER_UNIT_MOVES
28731 use_vector_set = TARGET_SSE4_1;
28734 use_vector_set = TARGET_SSE2;
28737 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28744 use_vector_set = TARGET_AVX;
28747 /* Use ix86_expand_vector_set in 64bit mode only. */
28748 use_vector_set = TARGET_AVX && TARGET_64BIT;
28754 if (use_vector_set)
28756 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28757 var = force_reg (GET_MODE_INNER (mode), var);
28758 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28774 var = force_reg (GET_MODE_INNER (mode), var);
28775 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28776 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28781 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28782 new_target = gen_reg_rtx (mode);
28784 new_target = target;
28785 var = force_reg (GET_MODE_INNER (mode), var);
28786 x = gen_rtx_VEC_DUPLICATE (mode, var);
28787 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28788 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28791 /* We need to shuffle the value to the correct position, so
28792 create a new pseudo to store the intermediate result. */
28794 /* With SSE2, we can use the integer shuffle insns. */
28795 if (mode != V4SFmode && TARGET_SSE2)
28797 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28799 GEN_INT (one_var == 1 ? 0 : 1),
28800 GEN_INT (one_var == 2 ? 0 : 1),
28801 GEN_INT (one_var == 3 ? 0 : 1)));
28802 if (target != new_target)
28803 emit_move_insn (target, new_target);
28807 /* Otherwise convert the intermediate result to V4SFmode and
28808 use the SSE1 shuffle instructions. */
28809 if (mode != V4SFmode)
28811 tmp = gen_reg_rtx (V4SFmode);
28812 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28817 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28819 GEN_INT (one_var == 1 ? 0 : 1),
28820 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28821 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28823 if (mode != V4SFmode)
28824 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28825 else if (tmp != target)
28826 emit_move_insn (target, tmp);
28828 else if (target != new_target)
28829 emit_move_insn (target, new_target);
28834 vsimode = V4SImode;
28840 vsimode = V2SImode;
28846 /* Zero extend the variable element to SImode and recurse. */
28847 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28849 x = gen_reg_rtx (vsimode);
28850 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28852 gcc_unreachable ();
28854 emit_move_insn (target, gen_lowpart (mode, x));
28862 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28863 consisting of the values in VALS. It is known that all elements
28864 except ONE_VAR are constants. Return true if successful. */
28867 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28868 rtx target, rtx vals, int one_var)
28870 rtx var = XVECEXP (vals, 0, one_var);
28871 enum machine_mode wmode;
28874 const_vec = copy_rtx (vals);
28875 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28876 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28884 /* For the two element vectors, it's just as easy to use
28885 the general case. */
28889 /* Use ix86_expand_vector_set in 64bit mode only. */
28912 /* There's no way to set one QImode entry easily. Combine
28913 the variable value with its adjacent constant value, and
28914 promote to an HImode set. */
28915 x = XVECEXP (vals, 0, one_var ^ 1);
28918 var = convert_modes (HImode, QImode, var, true);
28919 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28920 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28921 x = GEN_INT (INTVAL (x) & 0xff);
28925 var = convert_modes (HImode, QImode, var, true);
28926 x = gen_int_mode (INTVAL (x) << 8, HImode);
28928 if (x != const0_rtx)
28929 var = expand_simple_binop (HImode, IOR, var, x, var,
28930 1, OPTAB_LIB_WIDEN);
28932 x = gen_reg_rtx (wmode);
28933 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28934 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28936 emit_move_insn (target, gen_lowpart (mode, x));
28943 emit_move_insn (target, const_vec);
28944 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28948 /* A subroutine of ix86_expand_vector_init_general. Use vector
28949 concatenate to handle the most general case: all values variable,
28950 and none identical. */
28953 ix86_expand_vector_init_concat (enum machine_mode mode,
28954 rtx target, rtx *ops, int n)
28956 enum machine_mode cmode, hmode = VOIDmode;
28957 rtx first[8], second[4];
28997 gcc_unreachable ();
29000 if (!register_operand (ops[1], cmode))
29001 ops[1] = force_reg (cmode, ops[1]);
29002 if (!register_operand (ops[0], cmode))
29003 ops[0] = force_reg (cmode, ops[0]);
29004 emit_insn (gen_rtx_SET (VOIDmode, target,
29005 gen_rtx_VEC_CONCAT (mode, ops[0],
29025 gcc_unreachable ();
29041 gcc_unreachable ();
29046 /* FIXME: We process inputs backward to help RA. PR 36222. */
29049 for (; i > 0; i -= 2, j--)
29051 first[j] = gen_reg_rtx (cmode);
29052 v = gen_rtvec (2, ops[i - 1], ops[i]);
29053 ix86_expand_vector_init (false, first[j],
29054 gen_rtx_PARALLEL (cmode, v));
29060 gcc_assert (hmode != VOIDmode);
29061 for (i = j = 0; i < n; i += 2, j++)
29063 second[j] = gen_reg_rtx (hmode);
29064 ix86_expand_vector_init_concat (hmode, second [j],
29068 ix86_expand_vector_init_concat (mode, target, second, n);
29071 ix86_expand_vector_init_concat (mode, target, first, n);
29075 gcc_unreachable ();
29079 /* A subroutine of ix86_expand_vector_init_general. Use vector
29080 interleave to handle the most general case: all values variable,
29081 and none identical. */
29084 ix86_expand_vector_init_interleave (enum machine_mode mode,
29085 rtx target, rtx *ops, int n)
29087 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29090 rtx (*gen_load_even) (rtx, rtx, rtx);
29091 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29092 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29097 gen_load_even = gen_vec_setv8hi;
29098 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29099 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29100 inner_mode = HImode;
29101 first_imode = V4SImode;
29102 second_imode = V2DImode;
29103 third_imode = VOIDmode;
29106 gen_load_even = gen_vec_setv16qi;
29107 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29108 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29109 inner_mode = QImode;
29110 first_imode = V8HImode;
29111 second_imode = V4SImode;
29112 third_imode = V2DImode;
29115 gcc_unreachable ();
29118 for (i = 0; i < n; i++)
29120 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29121 op0 = gen_reg_rtx (SImode);
29122 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29124 /* Insert the SImode value as low element of V4SImode vector. */
29125 op1 = gen_reg_rtx (V4SImode);
29126 op0 = gen_rtx_VEC_MERGE (V4SImode,
29127 gen_rtx_VEC_DUPLICATE (V4SImode,
29129 CONST0_RTX (V4SImode),
29131 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29133 /* Cast the V4SImode vector back to a vector in orignal mode. */
29134 op0 = gen_reg_rtx (mode);
29135 emit_move_insn (op0, gen_lowpart (mode, op1));
29137 /* Load even elements into the second positon. */
29138 emit_insn (gen_load_even (op0,
29139 force_reg (inner_mode,
29143 /* Cast vector to FIRST_IMODE vector. */
29144 ops[i] = gen_reg_rtx (first_imode);
29145 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29148 /* Interleave low FIRST_IMODE vectors. */
29149 for (i = j = 0; i < n; i += 2, j++)
29151 op0 = gen_reg_rtx (first_imode);
29152 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29154 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29155 ops[j] = gen_reg_rtx (second_imode);
29156 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29159 /* Interleave low SECOND_IMODE vectors. */
29160 switch (second_imode)
29163 for (i = j = 0; i < n / 2; i += 2, j++)
29165 op0 = gen_reg_rtx (second_imode);
29166 emit_insn (gen_interleave_second_low (op0, ops[i],
29169 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29171 ops[j] = gen_reg_rtx (third_imode);
29172 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29174 second_imode = V2DImode;
29175 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29179 op0 = gen_reg_rtx (second_imode);
29180 emit_insn (gen_interleave_second_low (op0, ops[0],
29183 /* Cast the SECOND_IMODE vector back to a vector on original
29185 emit_insn (gen_rtx_SET (VOIDmode, target,
29186 gen_lowpart (mode, op0)));
29190 gcc_unreachable ();
29194 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29195 all values variable, and none identical. */
29198 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29199 rtx target, rtx vals)
29201 rtx ops[32], op0, op1;
29202 enum machine_mode half_mode = VOIDmode;
29209 if (!mmx_ok && !TARGET_SSE)
29221 n = GET_MODE_NUNITS (mode);
29222 for (i = 0; i < n; i++)
29223 ops[i] = XVECEXP (vals, 0, i);
29224 ix86_expand_vector_init_concat (mode, target, ops, n);
29228 half_mode = V16QImode;
29232 half_mode = V8HImode;
29236 n = GET_MODE_NUNITS (mode);
29237 for (i = 0; i < n; i++)
29238 ops[i] = XVECEXP (vals, 0, i);
29239 op0 = gen_reg_rtx (half_mode);
29240 op1 = gen_reg_rtx (half_mode);
29241 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29243 ix86_expand_vector_init_interleave (half_mode, op1,
29244 &ops [n >> 1], n >> 2);
29245 emit_insn (gen_rtx_SET (VOIDmode, target,
29246 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29250 if (!TARGET_SSE4_1)
29258 /* Don't use ix86_expand_vector_init_interleave if we can't
29259 move from GPR to SSE register directly. */
29260 if (!TARGET_INTER_UNIT_MOVES)
29263 n = GET_MODE_NUNITS (mode);
29264 for (i = 0; i < n; i++)
29265 ops[i] = XVECEXP (vals, 0, i);
29266 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29274 gcc_unreachable ();
29278 int i, j, n_elts, n_words, n_elt_per_word;
29279 enum machine_mode inner_mode;
29280 rtx words[4], shift;
29282 inner_mode = GET_MODE_INNER (mode);
29283 n_elts = GET_MODE_NUNITS (mode);
29284 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29285 n_elt_per_word = n_elts / n_words;
29286 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29288 for (i = 0; i < n_words; ++i)
29290 rtx word = NULL_RTX;
29292 for (j = 0; j < n_elt_per_word; ++j)
29294 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29295 elt = convert_modes (word_mode, inner_mode, elt, true);
29301 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29302 word, 1, OPTAB_LIB_WIDEN);
29303 word = expand_simple_binop (word_mode, IOR, word, elt,
29304 word, 1, OPTAB_LIB_WIDEN);
29312 emit_move_insn (target, gen_lowpart (mode, words[0]));
29313 else if (n_words == 2)
29315 rtx tmp = gen_reg_rtx (mode);
29316 emit_clobber (tmp);
29317 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29318 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29319 emit_move_insn (target, tmp);
29321 else if (n_words == 4)
29323 rtx tmp = gen_reg_rtx (V4SImode);
29324 gcc_assert (word_mode == SImode);
29325 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29326 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29327 emit_move_insn (target, gen_lowpart (mode, tmp));
29330 gcc_unreachable ();
29334 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29335 instructions unless MMX_OK is true. */
29338 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29340 enum machine_mode mode = GET_MODE (target);
29341 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29342 int n_elts = GET_MODE_NUNITS (mode);
29343 int n_var = 0, one_var = -1;
29344 bool all_same = true, all_const_zero = true;
29348 for (i = 0; i < n_elts; ++i)
29350 x = XVECEXP (vals, 0, i);
29351 if (!(CONST_INT_P (x)
29352 || GET_CODE (x) == CONST_DOUBLE
29353 || GET_CODE (x) == CONST_FIXED))
29354 n_var++, one_var = i;
29355 else if (x != CONST0_RTX (inner_mode))
29356 all_const_zero = false;
29357 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
29361 /* Constants are best loaded from the constant pool. */
29364 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29368 /* If all values are identical, broadcast the value. */
29370 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29371 XVECEXP (vals, 0, 0)))
29374 /* Values where only one field is non-constant are best loaded from
29375 the pool and overwritten via move later. */
29379 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29380 XVECEXP (vals, 0, one_var),
29384 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29388 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29392 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29394 enum machine_mode mode = GET_MODE (target);
29395 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29396 enum machine_mode half_mode;
29397 bool use_vec_merge = false;
29399 static rtx (*gen_extract[6][2]) (rtx, rtx)
29401 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29402 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29403 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29404 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29405 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29406 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29408 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29410 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29411 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29412 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29413 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29414 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29415 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29425 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29426 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29428 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29430 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29431 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29437 use_vec_merge = TARGET_SSE4_1;
29445 /* For the two element vectors, we implement a VEC_CONCAT with
29446 the extraction of the other element. */
29448 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29449 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29452 op0 = val, op1 = tmp;
29454 op0 = tmp, op1 = val;
29456 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29457 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29462 use_vec_merge = TARGET_SSE4_1;
29469 use_vec_merge = true;
29473 /* tmp = target = A B C D */
29474 tmp = copy_to_reg (target);
29475 /* target = A A B B */
29476 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29477 /* target = X A B B */
29478 ix86_expand_vector_set (false, target, val, 0);
29479 /* target = A X C D */
29480 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29481 const1_rtx, const0_rtx,
29482 GEN_INT (2+4), GEN_INT (3+4)));
29486 /* tmp = target = A B C D */
29487 tmp = copy_to_reg (target);
29488 /* tmp = X B C D */
29489 ix86_expand_vector_set (false, tmp, val, 0);
29490 /* target = A B X D */
29491 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29492 const0_rtx, const1_rtx,
29493 GEN_INT (0+4), GEN_INT (3+4)));
29497 /* tmp = target = A B C D */
29498 tmp = copy_to_reg (target);
29499 /* tmp = X B C D */
29500 ix86_expand_vector_set (false, tmp, val, 0);
29501 /* target = A B X D */
29502 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29503 const0_rtx, const1_rtx,
29504 GEN_INT (2+4), GEN_INT (0+4)));
29508 gcc_unreachable ();
29513 use_vec_merge = TARGET_SSE4_1;
29517 /* Element 0 handled by vec_merge below. */
29520 use_vec_merge = true;
29526 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29527 store into element 0, then shuffle them back. */
29531 order[0] = GEN_INT (elt);
29532 order[1] = const1_rtx;
29533 order[2] = const2_rtx;
29534 order[3] = GEN_INT (3);
29535 order[elt] = const0_rtx;
29537 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29538 order[1], order[2], order[3]));
29540 ix86_expand_vector_set (false, target, val, 0);
29542 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29543 order[1], order[2], order[3]));
29547 /* For SSE1, we have to reuse the V4SF code. */
29548 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29549 gen_lowpart (SFmode, val), elt);
29554 use_vec_merge = TARGET_SSE2;
29557 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29561 use_vec_merge = TARGET_SSE4_1;
29568 half_mode = V16QImode;
29574 half_mode = V8HImode;
29580 half_mode = V4SImode;
29586 half_mode = V2DImode;
29592 half_mode = V4SFmode;
29598 half_mode = V2DFmode;
29604 /* Compute offset. */
29608 gcc_assert (i <= 1);
29610 /* Extract the half. */
29611 tmp = gen_reg_rtx (half_mode);
29612 emit_insn (gen_extract[j][i] (tmp, target));
29614 /* Put val in tmp at elt. */
29615 ix86_expand_vector_set (false, tmp, val, elt);
29618 emit_insn (gen_insert[j][i] (target, target, tmp));
29627 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29628 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29629 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29633 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29635 emit_move_insn (mem, target);
29637 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29638 emit_move_insn (tmp, val);
29640 emit_move_insn (target, mem);
29645 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29647 enum machine_mode mode = GET_MODE (vec);
29648 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29649 bool use_vec_extr = false;
29662 use_vec_extr = true;
29666 use_vec_extr = TARGET_SSE4_1;
29678 tmp = gen_reg_rtx (mode);
29679 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29680 GEN_INT (elt), GEN_INT (elt),
29681 GEN_INT (elt+4), GEN_INT (elt+4)));
29685 tmp = gen_reg_rtx (mode);
29686 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29690 gcc_unreachable ();
29693 use_vec_extr = true;
29698 use_vec_extr = TARGET_SSE4_1;
29712 tmp = gen_reg_rtx (mode);
29713 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29714 GEN_INT (elt), GEN_INT (elt),
29715 GEN_INT (elt), GEN_INT (elt)));
29719 tmp = gen_reg_rtx (mode);
29720 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29724 gcc_unreachable ();
29727 use_vec_extr = true;
29732 /* For SSE1, we have to reuse the V4SF code. */
29733 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29734 gen_lowpart (V4SFmode, vec), elt);
29740 use_vec_extr = TARGET_SSE2;
29743 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29747 use_vec_extr = TARGET_SSE4_1;
29751 /* ??? Could extract the appropriate HImode element and shift. */
29758 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29759 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29761 /* Let the rtl optimizers know about the zero extension performed. */
29762 if (inner_mode == QImode || inner_mode == HImode)
29764 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29765 target = gen_lowpart (SImode, target);
29768 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29772 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29774 emit_move_insn (mem, vec);
29776 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29777 emit_move_insn (target, tmp);
29781 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29782 pattern to reduce; DEST is the destination; IN is the input vector. */
29785 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29787 rtx tmp1, tmp2, tmp3;
29789 tmp1 = gen_reg_rtx (V4SFmode);
29790 tmp2 = gen_reg_rtx (V4SFmode);
29791 tmp3 = gen_reg_rtx (V4SFmode);
29793 emit_insn (gen_sse_movhlps (tmp1, in, in));
29794 emit_insn (fn (tmp2, tmp1, in));
29796 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29797 const1_rtx, const1_rtx,
29798 GEN_INT (1+4), GEN_INT (1+4)));
29799 emit_insn (fn (dest, tmp2, tmp3));
29802 /* Target hook for scalar_mode_supported_p. */
29804 ix86_scalar_mode_supported_p (enum machine_mode mode)
29806 if (DECIMAL_FLOAT_MODE_P (mode))
29807 return default_decimal_float_supported_p ();
29808 else if (mode == TFmode)
29811 return default_scalar_mode_supported_p (mode);
29814 /* Implements target hook vector_mode_supported_p. */
29816 ix86_vector_mode_supported_p (enum machine_mode mode)
29818 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29820 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29822 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29824 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29826 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29831 /* Target hook for c_mode_for_suffix. */
29832 static enum machine_mode
29833 ix86_c_mode_for_suffix (char suffix)
29843 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29845 We do this in the new i386 backend to maintain source compatibility
29846 with the old cc0-based compiler. */
29849 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29850 tree inputs ATTRIBUTE_UNUSED,
29853 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29855 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29860 /* Implements target vector targetm.asm.encode_section_info. This
29861 is not used by netware. */
29863 static void ATTRIBUTE_UNUSED
29864 ix86_encode_section_info (tree decl, rtx rtl, int first)
29866 default_encode_section_info (decl, rtl, first);
29868 if (TREE_CODE (decl) == VAR_DECL
29869 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29870 && ix86_in_large_data_p (decl))
29871 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29874 /* Worker function for REVERSE_CONDITION. */
29877 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29879 return (mode != CCFPmode && mode != CCFPUmode
29880 ? reverse_condition (code)
29881 : reverse_condition_maybe_unordered (code));
29884 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29888 output_387_reg_move (rtx insn, rtx *operands)
29890 if (REG_P (operands[0]))
29892 if (REG_P (operands[1])
29893 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29895 if (REGNO (operands[0]) == FIRST_STACK_REG)
29896 return output_387_ffreep (operands, 0);
29897 return "fstp\t%y0";
29899 if (STACK_TOP_P (operands[0]))
29900 return "fld%Z1\t%y1";
29903 else if (MEM_P (operands[0]))
29905 gcc_assert (REG_P (operands[1]));
29906 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29907 return "fstp%Z0\t%y0";
29910 /* There is no non-popping store to memory for XFmode.
29911 So if we need one, follow the store with a load. */
29912 if (GET_MODE (operands[0]) == XFmode)
29913 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29915 return "fst%Z0\t%y0";
29922 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29923 FP status register is set. */
29926 ix86_emit_fp_unordered_jump (rtx label)
29928 rtx reg = gen_reg_rtx (HImode);
29931 emit_insn (gen_x86_fnstsw_1 (reg));
29933 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29935 emit_insn (gen_x86_sahf_1 (reg));
29937 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29938 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29942 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29944 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29945 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29948 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29949 gen_rtx_LABEL_REF (VOIDmode, label),
29951 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29953 emit_jump_insn (temp);
29954 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29957 /* Output code to perform a log1p XFmode calculation. */
29959 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29961 rtx label1 = gen_label_rtx ();
29962 rtx label2 = gen_label_rtx ();
29964 rtx tmp = gen_reg_rtx (XFmode);
29965 rtx tmp2 = gen_reg_rtx (XFmode);
29968 emit_insn (gen_absxf2 (tmp, op1));
29969 test = gen_rtx_GE (VOIDmode, tmp,
29970 CONST_DOUBLE_FROM_REAL_VALUE (
29971 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29973 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29975 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29976 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29977 emit_jump (label2);
29979 emit_label (label1);
29980 emit_move_insn (tmp, CONST1_RTX (XFmode));
29981 emit_insn (gen_addxf3 (tmp, op1, tmp));
29982 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29983 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29985 emit_label (label2);
29988 /* Output code to perform a Newton-Rhapson approximation of a single precision
29989 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29991 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29993 rtx x0, x1, e0, e1, two;
29995 x0 = gen_reg_rtx (mode);
29996 e0 = gen_reg_rtx (mode);
29997 e1 = gen_reg_rtx (mode);
29998 x1 = gen_reg_rtx (mode);
30000 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
30002 if (VECTOR_MODE_P (mode))
30003 two = ix86_build_const_vector (SFmode, true, two);
30005 two = force_reg (mode, two);
30007 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30009 /* x0 = rcp(b) estimate */
30010 emit_insn (gen_rtx_SET (VOIDmode, x0,
30011 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30014 emit_insn (gen_rtx_SET (VOIDmode, e0,
30015 gen_rtx_MULT (mode, x0, a)));
30017 emit_insn (gen_rtx_SET (VOIDmode, e1,
30018 gen_rtx_MULT (mode, x0, b)));
30020 emit_insn (gen_rtx_SET (VOIDmode, x1,
30021 gen_rtx_MINUS (mode, two, e1)));
30022 /* res = e0 * x1 */
30023 emit_insn (gen_rtx_SET (VOIDmode, res,
30024 gen_rtx_MULT (mode, e0, x1)));
30027 /* Output code to perform a Newton-Rhapson approximation of a
30028 single precision floating point [reciprocal] square root. */
30030 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30033 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30036 x0 = gen_reg_rtx (mode);
30037 e0 = gen_reg_rtx (mode);
30038 e1 = gen_reg_rtx (mode);
30039 e2 = gen_reg_rtx (mode);
30040 e3 = gen_reg_rtx (mode);
30042 real_from_integer (&r, VOIDmode, -3, -1, 0);
30043 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30045 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30046 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30048 if (VECTOR_MODE_P (mode))
30050 mthree = ix86_build_const_vector (SFmode, true, mthree);
30051 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
30054 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30055 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30057 /* x0 = rsqrt(a) estimate */
30058 emit_insn (gen_rtx_SET (VOIDmode, x0,
30059 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30062 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30067 zero = gen_reg_rtx (mode);
30068 mask = gen_reg_rtx (mode);
30070 zero = force_reg (mode, CONST0_RTX(mode));
30071 emit_insn (gen_rtx_SET (VOIDmode, mask,
30072 gen_rtx_NE (mode, zero, a)));
30074 emit_insn (gen_rtx_SET (VOIDmode, x0,
30075 gen_rtx_AND (mode, x0, mask)));
30079 emit_insn (gen_rtx_SET (VOIDmode, e0,
30080 gen_rtx_MULT (mode, x0, a)));
30082 emit_insn (gen_rtx_SET (VOIDmode, e1,
30083 gen_rtx_MULT (mode, e0, x0)));
30086 mthree = force_reg (mode, mthree);
30087 emit_insn (gen_rtx_SET (VOIDmode, e2,
30088 gen_rtx_PLUS (mode, e1, mthree)));
30090 mhalf = force_reg (mode, mhalf);
30092 /* e3 = -.5 * x0 */
30093 emit_insn (gen_rtx_SET (VOIDmode, e3,
30094 gen_rtx_MULT (mode, x0, mhalf)));
30096 /* e3 = -.5 * e0 */
30097 emit_insn (gen_rtx_SET (VOIDmode, e3,
30098 gen_rtx_MULT (mode, e0, mhalf)));
30099 /* ret = e2 * e3 */
30100 emit_insn (gen_rtx_SET (VOIDmode, res,
30101 gen_rtx_MULT (mode, e2, e3)));
30104 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30106 static void ATTRIBUTE_UNUSED
30107 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30110 /* With Binutils 2.15, the "@unwind" marker must be specified on
30111 every occurrence of the ".eh_frame" section, not just the first
30114 && strcmp (name, ".eh_frame") == 0)
30116 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30117 flags & SECTION_WRITE ? "aw" : "a");
30120 default_elf_asm_named_section (name, flags, decl);
30123 /* Return the mangling of TYPE if it is an extended fundamental type. */
30125 static const char *
30126 ix86_mangle_type (const_tree type)
30128 type = TYPE_MAIN_VARIANT (type);
30130 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30131 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30134 switch (TYPE_MODE (type))
30137 /* __float128 is "g". */
30140 /* "long double" or __float80 is "e". */
30147 /* For 32-bit code we can save PIC register setup by using
30148 __stack_chk_fail_local hidden function instead of calling
30149 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30150 register, so it is better to call __stack_chk_fail directly. */
30153 ix86_stack_protect_fail (void)
30155 return TARGET_64BIT
30156 ? default_external_stack_protect_fail ()
30157 : default_hidden_stack_protect_fail ();
30160 /* Select a format to encode pointers in exception handling data. CODE
30161 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30162 true if the symbol may be affected by dynamic relocations.
30164 ??? All x86 object file formats are capable of representing this.
30165 After all, the relocation needed is the same as for the call insn.
30166 Whether or not a particular assembler allows us to enter such, I
30167 guess we'll have to see. */
30169 asm_preferred_eh_data_format (int code, int global)
30173 int type = DW_EH_PE_sdata8;
30175 || ix86_cmodel == CM_SMALL_PIC
30176 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30177 type = DW_EH_PE_sdata4;
30178 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30180 if (ix86_cmodel == CM_SMALL
30181 || (ix86_cmodel == CM_MEDIUM && code))
30182 return DW_EH_PE_udata4;
30183 return DW_EH_PE_absptr;
30186 /* Expand copysign from SIGN to the positive value ABS_VALUE
30187 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30190 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30192 enum machine_mode mode = GET_MODE (sign);
30193 rtx sgn = gen_reg_rtx (mode);
30194 if (mask == NULL_RTX)
30196 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
30197 if (!VECTOR_MODE_P (mode))
30199 /* We need to generate a scalar mode mask in this case. */
30200 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30201 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30202 mask = gen_reg_rtx (mode);
30203 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30207 mask = gen_rtx_NOT (mode, mask);
30208 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30209 gen_rtx_AND (mode, mask, sign)));
30210 emit_insn (gen_rtx_SET (VOIDmode, result,
30211 gen_rtx_IOR (mode, abs_value, sgn)));
30214 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30215 mask for masking out the sign-bit is stored in *SMASK, if that is
30218 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30220 enum machine_mode mode = GET_MODE (op0);
30223 xa = gen_reg_rtx (mode);
30224 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
30225 if (!VECTOR_MODE_P (mode))
30227 /* We need to generate a scalar mode mask in this case. */
30228 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30229 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30230 mask = gen_reg_rtx (mode);
30231 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30233 emit_insn (gen_rtx_SET (VOIDmode, xa,
30234 gen_rtx_AND (mode, op0, mask)));
30242 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30243 swapping the operands if SWAP_OPERANDS is true. The expanded
30244 code is a forward jump to a newly created label in case the
30245 comparison is true. The generated label rtx is returned. */
30247 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30248 bool swap_operands)
30259 label = gen_label_rtx ();
30260 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30261 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30262 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30263 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30264 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30265 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30266 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30267 JUMP_LABEL (tmp) = label;
30272 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30273 using comparison code CODE. Operands are swapped for the comparison if
30274 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30276 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30277 bool swap_operands)
30279 enum machine_mode mode = GET_MODE (op0);
30280 rtx mask = gen_reg_rtx (mode);
30289 if (mode == DFmode)
30290 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30291 gen_rtx_fmt_ee (code, mode, op0, op1)));
30293 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30294 gen_rtx_fmt_ee (code, mode, op0, op1)));
30299 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30300 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30302 ix86_gen_TWO52 (enum machine_mode mode)
30304 REAL_VALUE_TYPE TWO52r;
30307 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30308 TWO52 = const_double_from_real_value (TWO52r, mode);
30309 TWO52 = force_reg (mode, TWO52);
30314 /* Expand SSE sequence for computing lround from OP1 storing
30317 ix86_expand_lround (rtx op0, rtx op1)
30319 /* C code for the stuff we're doing below:
30320 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30323 enum machine_mode mode = GET_MODE (op1);
30324 const struct real_format *fmt;
30325 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30328 /* load nextafter (0.5, 0.0) */
30329 fmt = REAL_MODE_FORMAT (mode);
30330 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30331 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30333 /* adj = copysign (0.5, op1) */
30334 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30335 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
30337 /* adj = op1 + adj */
30338 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
30340 /* op0 = (imode)adj */
30341 expand_fix (op0, adj, 0);
30344 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
30347 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
30349 /* C code for the stuff we're doing below (for do_floor):
30351 xi -= (double)xi > op1 ? 1 : 0;
30354 enum machine_mode fmode = GET_MODE (op1);
30355 enum machine_mode imode = GET_MODE (op0);
30356 rtx ireg, freg, label, tmp;
30358 /* reg = (long)op1 */
30359 ireg = gen_reg_rtx (imode);
30360 expand_fix (ireg, op1, 0);
30362 /* freg = (double)reg */
30363 freg = gen_reg_rtx (fmode);
30364 expand_float (freg, ireg, 0);
30366 /* ireg = (freg > op1) ? ireg - 1 : ireg */
30367 label = ix86_expand_sse_compare_and_jump (UNLE,
30368 freg, op1, !do_floor);
30369 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30370 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30371 emit_move_insn (ireg, tmp);
30373 emit_label (label);
30374 LABEL_NUSES (label) = 1;
30376 emit_move_insn (op0, ireg);
30379 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30380 result in OPERAND0. */
30382 ix86_expand_rint (rtx operand0, rtx operand1)
30384 /* C code for the stuff we're doing below:
30385 xa = fabs (operand1);
30386 if (!isless (xa, 2**52))
30388 xa = xa + 2**52 - 2**52;
30389 return copysign (xa, operand1);
30391 enum machine_mode mode = GET_MODE (operand0);
30392 rtx res, xa, label, TWO52, mask;
30394 res = gen_reg_rtx (mode);
30395 emit_move_insn (res, operand1);
30397 /* xa = abs (operand1) */
30398 xa = ix86_expand_sse_fabs (res, &mask);
30400 /* if (!isless (xa, TWO52)) goto label; */
30401 TWO52 = ix86_gen_TWO52 (mode);
30402 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30404 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30405 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30407 ix86_sse_copysign_to_positive (res, xa, res, mask);
30409 emit_label (label);
30410 LABEL_NUSES (label) = 1;
30412 emit_move_insn (operand0, res);
30415 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30418 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30420 /* C code for the stuff we expand below.
30421 double xa = fabs (x), x2;
30422 if (!isless (xa, TWO52))
30424 xa = xa + TWO52 - TWO52;
30425 x2 = copysign (xa, x);
30434 enum machine_mode mode = GET_MODE (operand0);
30435 rtx xa, TWO52, tmp, label, one, res, mask;
30437 TWO52 = ix86_gen_TWO52 (mode);
30439 /* Temporary for holding the result, initialized to the input
30440 operand to ease control flow. */
30441 res = gen_reg_rtx (mode);
30442 emit_move_insn (res, operand1);
30444 /* xa = abs (operand1) */
30445 xa = ix86_expand_sse_fabs (res, &mask);
30447 /* if (!isless (xa, TWO52)) goto label; */
30448 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30450 /* xa = xa + TWO52 - TWO52; */
30451 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30452 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30454 /* xa = copysign (xa, operand1) */
30455 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30457 /* generate 1.0 or -1.0 */
30458 one = force_reg (mode,
30459 const_double_from_real_value (do_floor
30460 ? dconst1 : dconstm1, mode));
30462 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30463 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30464 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30465 gen_rtx_AND (mode, one, tmp)));
30466 /* We always need to subtract here to preserve signed zero. */
30467 tmp = expand_simple_binop (mode, MINUS,
30468 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30469 emit_move_insn (res, tmp);
30471 emit_label (label);
30472 LABEL_NUSES (label) = 1;
30474 emit_move_insn (operand0, res);
30477 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30480 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30482 /* C code for the stuff we expand below.
30483 double xa = fabs (x), x2;
30484 if (!isless (xa, TWO52))
30486 x2 = (double)(long)x;
30493 if (HONOR_SIGNED_ZEROS (mode))
30494 return copysign (x2, x);
30497 enum machine_mode mode = GET_MODE (operand0);
30498 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30500 TWO52 = ix86_gen_TWO52 (mode);
30502 /* Temporary for holding the result, initialized to the input
30503 operand to ease control flow. */
30504 res = gen_reg_rtx (mode);
30505 emit_move_insn (res, operand1);
30507 /* xa = abs (operand1) */
30508 xa = ix86_expand_sse_fabs (res, &mask);
30510 /* if (!isless (xa, TWO52)) goto label; */
30511 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30513 /* xa = (double)(long)x */
30514 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30515 expand_fix (xi, res, 0);
30516 expand_float (xa, xi, 0);
30519 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30521 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30522 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30523 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30524 gen_rtx_AND (mode, one, tmp)));
30525 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30526 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30527 emit_move_insn (res, tmp);
30529 if (HONOR_SIGNED_ZEROS (mode))
30530 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30532 emit_label (label);
30533 LABEL_NUSES (label) = 1;
30535 emit_move_insn (operand0, res);
30538 /* Expand SSE sequence for computing round from OPERAND1 storing
30539 into OPERAND0. Sequence that works without relying on DImode truncation
30540 via cvttsd2siq that is only available on 64bit targets. */
30542 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30544 /* C code for the stuff we expand below.
30545 double xa = fabs (x), xa2, x2;
30546 if (!isless (xa, TWO52))
30548 Using the absolute value and copying back sign makes
30549 -0.0 -> -0.0 correct.
30550 xa2 = xa + TWO52 - TWO52;
30555 else if (dxa > 0.5)
30557 x2 = copysign (xa2, x);
30560 enum machine_mode mode = GET_MODE (operand0);
30561 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30563 TWO52 = ix86_gen_TWO52 (mode);
30565 /* Temporary for holding the result, initialized to the input
30566 operand to ease control flow. */
30567 res = gen_reg_rtx (mode);
30568 emit_move_insn (res, operand1);
30570 /* xa = abs (operand1) */
30571 xa = ix86_expand_sse_fabs (res, &mask);
30573 /* if (!isless (xa, TWO52)) goto label; */
30574 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30576 /* xa2 = xa + TWO52 - TWO52; */
30577 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30578 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30580 /* dxa = xa2 - xa; */
30581 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30583 /* generate 0.5, 1.0 and -0.5 */
30584 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30585 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30586 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30590 tmp = gen_reg_rtx (mode);
30591 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30592 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30593 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30594 gen_rtx_AND (mode, one, tmp)));
30595 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30596 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30597 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30598 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30599 gen_rtx_AND (mode, one, tmp)));
30600 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30602 /* res = copysign (xa2, operand1) */
30603 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30605 emit_label (label);
30606 LABEL_NUSES (label) = 1;
30608 emit_move_insn (operand0, res);
30611 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30614 ix86_expand_trunc (rtx operand0, rtx operand1)
30616 /* C code for SSE variant we expand below.
30617 double xa = fabs (x), x2;
30618 if (!isless (xa, TWO52))
30620 x2 = (double)(long)x;
30621 if (HONOR_SIGNED_ZEROS (mode))
30622 return copysign (x2, x);
30625 enum machine_mode mode = GET_MODE (operand0);
30626 rtx xa, xi, TWO52, label, res, mask;
30628 TWO52 = ix86_gen_TWO52 (mode);
30630 /* Temporary for holding the result, initialized to the input
30631 operand to ease control flow. */
30632 res = gen_reg_rtx (mode);
30633 emit_move_insn (res, operand1);
30635 /* xa = abs (operand1) */
30636 xa = ix86_expand_sse_fabs (res, &mask);
30638 /* if (!isless (xa, TWO52)) goto label; */
30639 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30641 /* x = (double)(long)x */
30642 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30643 expand_fix (xi, res, 0);
30644 expand_float (res, xi, 0);
30646 if (HONOR_SIGNED_ZEROS (mode))
30647 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30649 emit_label (label);
30650 LABEL_NUSES (label) = 1;
30652 emit_move_insn (operand0, res);
30655 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30658 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30660 enum machine_mode mode = GET_MODE (operand0);
30661 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30663 /* C code for SSE variant we expand below.
30664 double xa = fabs (x), x2;
30665 if (!isless (xa, TWO52))
30667 xa2 = xa + TWO52 - TWO52;
30671 x2 = copysign (xa2, x);
30675 TWO52 = ix86_gen_TWO52 (mode);
30677 /* Temporary for holding the result, initialized to the input
30678 operand to ease control flow. */
30679 res = gen_reg_rtx (mode);
30680 emit_move_insn (res, operand1);
30682 /* xa = abs (operand1) */
30683 xa = ix86_expand_sse_fabs (res, &smask);
30685 /* if (!isless (xa, TWO52)) goto label; */
30686 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30688 /* res = xa + TWO52 - TWO52; */
30689 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30690 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30691 emit_move_insn (res, tmp);
30694 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30696 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30697 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30698 emit_insn (gen_rtx_SET (VOIDmode, mask,
30699 gen_rtx_AND (mode, mask, one)));
30700 tmp = expand_simple_binop (mode, MINUS,
30701 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30702 emit_move_insn (res, tmp);
30704 /* res = copysign (res, operand1) */
30705 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30707 emit_label (label);
30708 LABEL_NUSES (label) = 1;
30710 emit_move_insn (operand0, res);
30713 /* Expand SSE sequence for computing round from OPERAND1 storing
30716 ix86_expand_round (rtx operand0, rtx operand1)
30718 /* C code for the stuff we're doing below:
30719 double xa = fabs (x);
30720 if (!isless (xa, TWO52))
30722 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30723 return copysign (xa, x);
30725 enum machine_mode mode = GET_MODE (operand0);
30726 rtx res, TWO52, xa, label, xi, half, mask;
30727 const struct real_format *fmt;
30728 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30730 /* Temporary for holding the result, initialized to the input
30731 operand to ease control flow. */
30732 res = gen_reg_rtx (mode);
30733 emit_move_insn (res, operand1);
30735 TWO52 = ix86_gen_TWO52 (mode);
30736 xa = ix86_expand_sse_fabs (res, &mask);
30737 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30739 /* load nextafter (0.5, 0.0) */
30740 fmt = REAL_MODE_FORMAT (mode);
30741 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30742 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30744 /* xa = xa + 0.5 */
30745 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30746 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30748 /* xa = (double)(int64_t)xa */
30749 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30750 expand_fix (xi, xa, 0);
30751 expand_float (xa, xi, 0);
30753 /* res = copysign (xa, operand1) */
30754 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30756 emit_label (label);
30757 LABEL_NUSES (label) = 1;
30759 emit_move_insn (operand0, res);
30763 /* Table of valid machine attributes. */
30764 static const struct attribute_spec ix86_attribute_table[] =
30766 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30767 /* Stdcall attribute says callee is responsible for popping arguments
30768 if they are not variable. */
30769 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30770 /* Fastcall attribute says callee is responsible for popping arguments
30771 if they are not variable. */
30772 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30773 /* Thiscall attribute says callee is responsible for popping arguments
30774 if they are not variable. */
30775 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30776 /* Cdecl attribute says the callee is a normal C declaration */
30777 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30778 /* Regparm attribute specifies how many integer arguments are to be
30779 passed in registers. */
30780 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30781 /* Sseregparm attribute says we are using x86_64 calling conventions
30782 for FP arguments. */
30783 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30784 /* force_align_arg_pointer says this function realigns the stack at entry. */
30785 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30786 false, true, true, ix86_handle_cconv_attribute },
30787 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30788 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30789 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30790 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30792 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30793 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30794 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30795 SUBTARGET_ATTRIBUTE_TABLE,
30797 /* ms_abi and sysv_abi calling convention function attributes. */
30798 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30799 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30800 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30802 { NULL, 0, 0, false, false, false, NULL }
30805 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30807 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30808 tree vectype ATTRIBUTE_UNUSED,
30809 int misalign ATTRIBUTE_UNUSED)
30811 switch (type_of_cost)
30814 return ix86_cost->scalar_stmt_cost;
30817 return ix86_cost->scalar_load_cost;
30820 return ix86_cost->scalar_store_cost;
30823 return ix86_cost->vec_stmt_cost;
30826 return ix86_cost->vec_align_load_cost;
30829 return ix86_cost->vec_store_cost;
30831 case vec_to_scalar:
30832 return ix86_cost->vec_to_scalar_cost;
30834 case scalar_to_vec:
30835 return ix86_cost->scalar_to_vec_cost;
30837 case unaligned_load:
30838 case unaligned_store:
30839 return ix86_cost->vec_unalign_load_cost;
30841 case cond_branch_taken:
30842 return ix86_cost->cond_taken_branch_cost;
30844 case cond_branch_not_taken:
30845 return ix86_cost->cond_not_taken_branch_cost;
30851 gcc_unreachable ();
30856 /* Implement targetm.vectorize.builtin_vec_perm. */
30859 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30861 tree itype = TREE_TYPE (vec_type);
30862 bool u = TYPE_UNSIGNED (itype);
30863 enum machine_mode vmode = TYPE_MODE (vec_type);
30864 enum ix86_builtins fcode;
30865 bool ok = TARGET_SSE2;
30871 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30874 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30876 itype = ix86_get_builtin_type (IX86_BT_DI);
30881 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30885 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30887 itype = ix86_get_builtin_type (IX86_BT_SI);
30891 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30894 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30897 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30900 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30910 *mask_type = itype;
30911 return ix86_builtins[(int) fcode];
30914 /* Return a vector mode with twice as many elements as VMODE. */
30915 /* ??? Consider moving this to a table generated by genmodes.c. */
30917 static enum machine_mode
30918 doublesize_vector_mode (enum machine_mode vmode)
30922 case V2SFmode: return V4SFmode;
30923 case V1DImode: return V2DImode;
30924 case V2SImode: return V4SImode;
30925 case V4HImode: return V8HImode;
30926 case V8QImode: return V16QImode;
30928 case V2DFmode: return V4DFmode;
30929 case V4SFmode: return V8SFmode;
30930 case V2DImode: return V4DImode;
30931 case V4SImode: return V8SImode;
30932 case V8HImode: return V16HImode;
30933 case V16QImode: return V32QImode;
30935 case V4DFmode: return V8DFmode;
30936 case V8SFmode: return V16SFmode;
30937 case V4DImode: return V8DImode;
30938 case V8SImode: return V16SImode;
30939 case V16HImode: return V32HImode;
30940 case V32QImode: return V64QImode;
30943 gcc_unreachable ();
30947 /* Construct (set target (vec_select op0 (parallel perm))) and
30948 return true if that's a valid instruction in the active ISA. */
30951 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30953 rtx rperm[MAX_VECT_LEN], x;
30956 for (i = 0; i < nelt; ++i)
30957 rperm[i] = GEN_INT (perm[i]);
30959 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30960 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30961 x = gen_rtx_SET (VOIDmode, target, x);
30964 if (recog_memoized (x) < 0)
30972 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30975 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30976 const unsigned char *perm, unsigned nelt)
30978 enum machine_mode v2mode;
30981 v2mode = doublesize_vector_mode (GET_MODE (op0));
30982 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30983 return expand_vselect (target, x, perm, nelt);
30986 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30987 in terms of blendp[sd] / pblendw / pblendvb. */
30990 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30992 enum machine_mode vmode = d->vmode;
30993 unsigned i, mask, nelt = d->nelt;
30994 rtx target, op0, op1, x;
30996 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30998 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
31001 /* This is a blend, not a permute. Elements must stay in their
31002 respective lanes. */
31003 for (i = 0; i < nelt; ++i)
31005 unsigned e = d->perm[i];
31006 if (!(e == i || e == i + nelt))
31013 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31014 decision should be extracted elsewhere, so that we only try that
31015 sequence once all budget==3 options have been tried. */
31017 /* For bytes, see if bytes move in pairs so we can use pblendw with
31018 an immediate argument, rather than pblendvb with a vector argument. */
31019 if (vmode == V16QImode)
31021 bool pblendw_ok = true;
31022 for (i = 0; i < 16 && pblendw_ok; i += 2)
31023 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31027 rtx rperm[16], vperm;
31029 for (i = 0; i < nelt; ++i)
31030 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31032 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31033 vperm = force_reg (V16QImode, vperm);
31035 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31040 target = d->target;
31052 for (i = 0; i < nelt; ++i)
31053 mask |= (d->perm[i] >= nelt) << i;
31057 for (i = 0; i < 2; ++i)
31058 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31062 for (i = 0; i < 4; ++i)
31063 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31067 for (i = 0; i < 8; ++i)
31068 mask |= (d->perm[i * 2] >= 16) << i;
31072 target = gen_lowpart (vmode, target);
31073 op0 = gen_lowpart (vmode, op0);
31074 op1 = gen_lowpart (vmode, op1);
31078 gcc_unreachable ();
31081 /* This matches five different patterns with the different modes. */
31082 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31083 x = gen_rtx_SET (VOIDmode, target, x);
31089 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31090 in terms of the variable form of vpermilps.
31092 Note that we will have already failed the immediate input vpermilps,
31093 which requires that the high and low part shuffle be identical; the
31094 variable form doesn't require that. */
31097 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31099 rtx rperm[8], vperm;
31102 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31105 /* We can only permute within the 128-bit lane. */
31106 for (i = 0; i < 8; ++i)
31108 unsigned e = d->perm[i];
31109 if (i < 4 ? e >= 4 : e < 4)
31116 for (i = 0; i < 8; ++i)
31118 unsigned e = d->perm[i];
31120 /* Within each 128-bit lane, the elements of op0 are numbered
31121 from 0 and the elements of op1 are numbered from 4. */
31127 rperm[i] = GEN_INT (e);
31130 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31131 vperm = force_reg (V8SImode, vperm);
31132 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31137 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31138 in terms of pshufb or vpperm. */
31141 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31143 unsigned i, nelt, eltsz;
31144 rtx rperm[16], vperm, target, op0, op1;
31146 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31148 if (GET_MODE_SIZE (d->vmode) != 16)
31155 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31157 for (i = 0; i < nelt; ++i)
31159 unsigned j, e = d->perm[i];
31160 for (j = 0; j < eltsz; ++j)
31161 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31164 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31165 vperm = force_reg (V16QImode, vperm);
31167 target = gen_lowpart (V16QImode, d->target);
31168 op0 = gen_lowpart (V16QImode, d->op0);
31169 if (d->op0 == d->op1)
31170 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31173 op1 = gen_lowpart (V16QImode, d->op1);
31174 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31180 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31181 in a single instruction. */
31184 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31186 unsigned i, nelt = d->nelt;
31187 unsigned char perm2[MAX_VECT_LEN];
31189 /* Check plain VEC_SELECT first, because AVX has instructions that could
31190 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31191 input where SEL+CONCAT may not. */
31192 if (d->op0 == d->op1)
31194 int mask = nelt - 1;
31196 for (i = 0; i < nelt; i++)
31197 perm2[i] = d->perm[i] & mask;
31199 if (expand_vselect (d->target, d->op0, perm2, nelt))
31202 /* There are plenty of patterns in sse.md that are written for
31203 SEL+CONCAT and are not replicated for a single op. Perhaps
31204 that should be changed, to avoid the nastiness here. */
31206 /* Recognize interleave style patterns, which means incrementing
31207 every other permutation operand. */
31208 for (i = 0; i < nelt; i += 2)
31210 perm2[i] = d->perm[i] & mask;
31211 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31213 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31216 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31219 for (i = 0; i < nelt; i += 4)
31221 perm2[i + 0] = d->perm[i + 0] & mask;
31222 perm2[i + 1] = d->perm[i + 1] & mask;
31223 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31224 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31227 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31232 /* Finally, try the fully general two operand permute. */
31233 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31236 /* Recognize interleave style patterns with reversed operands. */
31237 if (d->op0 != d->op1)
31239 for (i = 0; i < nelt; ++i)
31241 unsigned e = d->perm[i];
31249 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31253 /* Try the SSE4.1 blend variable merge instructions. */
31254 if (expand_vec_perm_blend (d))
31257 /* Try one of the AVX vpermil variable permutations. */
31258 if (expand_vec_perm_vpermil (d))
31261 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31262 if (expand_vec_perm_pshufb (d))
31268 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31269 in terms of a pair of pshuflw + pshufhw instructions. */
31272 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31274 unsigned char perm2[MAX_VECT_LEN];
31278 if (d->vmode != V8HImode || d->op0 != d->op1)
31281 /* The two permutations only operate in 64-bit lanes. */
31282 for (i = 0; i < 4; ++i)
31283 if (d->perm[i] >= 4)
31285 for (i = 4; i < 8; ++i)
31286 if (d->perm[i] < 4)
31292 /* Emit the pshuflw. */
31293 memcpy (perm2, d->perm, 4);
31294 for (i = 4; i < 8; ++i)
31296 ok = expand_vselect (d->target, d->op0, perm2, 8);
31299 /* Emit the pshufhw. */
31300 memcpy (perm2 + 4, d->perm + 4, 4);
31301 for (i = 0; i < 4; ++i)
31303 ok = expand_vselect (d->target, d->target, perm2, 8);
31309 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31310 the permutation using the SSSE3 palignr instruction. This succeeds
31311 when all of the elements in PERM fit within one vector and we merely
31312 need to shift them down so that a single vector permutation has a
31313 chance to succeed. */
31316 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31318 unsigned i, nelt = d->nelt;
31323 /* Even with AVX, palignr only operates on 128-bit vectors. */
31324 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31327 min = nelt, max = 0;
31328 for (i = 0; i < nelt; ++i)
31330 unsigned e = d->perm[i];
31336 if (min == 0 || max - min >= nelt)
31339 /* Given that we have SSSE3, we know we'll be able to implement the
31340 single operand permutation after the palignr with pshufb. */
31344 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
31345 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
31346 gen_lowpart (TImode, d->op1),
31347 gen_lowpart (TImode, d->op0), shift));
31349 d->op0 = d->op1 = d->target;
31352 for (i = 0; i < nelt; ++i)
31354 unsigned e = d->perm[i] - min;
31360 /* Test for the degenerate case where the alignment by itself
31361 produces the desired permutation. */
31365 ok = expand_vec_perm_1 (d);
31371 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31372 a two vector permutation into a single vector permutation by using
31373 an interleave operation to merge the vectors. */
31376 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31378 struct expand_vec_perm_d dremap, dfinal;
31379 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31380 unsigned contents, h1, h2, h3, h4;
31381 unsigned char remap[2 * MAX_VECT_LEN];
31385 if (d->op0 == d->op1)
31388 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31389 lanes. We can use similar techniques with the vperm2f128 instruction,
31390 but it requires slightly different logic. */
31391 if (GET_MODE_SIZE (d->vmode) != 16)
31394 /* Examine from whence the elements come. */
31396 for (i = 0; i < nelt; ++i)
31397 contents |= 1u << d->perm[i];
31399 /* Split the two input vectors into 4 halves. */
31400 h1 = (1u << nelt2) - 1;
31405 memset (remap, 0xff, sizeof (remap));
31408 /* If the elements from the low halves use interleave low, and similarly
31409 for interleave high. If the elements are from mis-matched halves, we
31410 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31411 if ((contents & (h1 | h3)) == contents)
31413 for (i = 0; i < nelt2; ++i)
31416 remap[i + nelt] = i * 2 + 1;
31417 dremap.perm[i * 2] = i;
31418 dremap.perm[i * 2 + 1] = i + nelt;
31421 else if ((contents & (h2 | h4)) == contents)
31423 for (i = 0; i < nelt2; ++i)
31425 remap[i + nelt2] = i * 2;
31426 remap[i + nelt + nelt2] = i * 2 + 1;
31427 dremap.perm[i * 2] = i + nelt2;
31428 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31431 else if ((contents & (h1 | h4)) == contents)
31433 for (i = 0; i < nelt2; ++i)
31436 remap[i + nelt + nelt2] = i + nelt2;
31437 dremap.perm[i] = i;
31438 dremap.perm[i + nelt2] = i + nelt + nelt2;
31442 dremap.vmode = V2DImode;
31444 dremap.perm[0] = 0;
31445 dremap.perm[1] = 3;
31448 else if ((contents & (h2 | h3)) == contents)
31450 for (i = 0; i < nelt2; ++i)
31452 remap[i + nelt2] = i;
31453 remap[i + nelt] = i + nelt2;
31454 dremap.perm[i] = i + nelt2;
31455 dremap.perm[i + nelt2] = i + nelt;
31459 dremap.vmode = V2DImode;
31461 dremap.perm[0] = 1;
31462 dremap.perm[1] = 2;
31468 /* Use the remapping array set up above to move the elements from their
31469 swizzled locations into their final destinations. */
31471 for (i = 0; i < nelt; ++i)
31473 unsigned e = remap[d->perm[i]];
31474 gcc_assert (e < nelt);
31475 dfinal.perm[i] = e;
31477 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31478 dfinal.op1 = dfinal.op0;
31479 dremap.target = dfinal.op0;
31481 /* Test if the final remap can be done with a single insn. For V4SFmode or
31482 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31484 ok = expand_vec_perm_1 (&dfinal);
31485 seq = get_insns ();
31491 if (dremap.vmode != dfinal.vmode)
31493 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31494 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31495 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31498 ok = expand_vec_perm_1 (&dremap);
31505 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31506 permutation with two pshufb insns and an ior. We should have already
31507 failed all two instruction sequences. */
31510 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31512 rtx rperm[2][16], vperm, l, h, op, m128;
31513 unsigned int i, nelt, eltsz;
31515 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31517 gcc_assert (d->op0 != d->op1);
31520 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31522 /* Generate two permutation masks. If the required element is within
31523 the given vector it is shuffled into the proper lane. If the required
31524 element is in the other vector, force a zero into the lane by setting
31525 bit 7 in the permutation mask. */
31526 m128 = GEN_INT (-128);
31527 for (i = 0; i < nelt; ++i)
31529 unsigned j, e = d->perm[i];
31530 unsigned which = (e >= nelt);
31534 for (j = 0; j < eltsz; ++j)
31536 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31537 rperm[1-which][i*eltsz + j] = m128;
31541 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31542 vperm = force_reg (V16QImode, vperm);
31544 l = gen_reg_rtx (V16QImode);
31545 op = gen_lowpart (V16QImode, d->op0);
31546 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31548 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31549 vperm = force_reg (V16QImode, vperm);
31551 h = gen_reg_rtx (V16QImode);
31552 op = gen_lowpart (V16QImode, d->op1);
31553 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31555 op = gen_lowpart (V16QImode, d->target);
31556 emit_insn (gen_iorv16qi3 (op, l, h));
31561 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31562 and extract-odd permutations. */
31565 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31567 rtx t1, t2, t3, t4;
31572 t1 = gen_reg_rtx (V4DFmode);
31573 t2 = gen_reg_rtx (V4DFmode);
31575 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31576 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31577 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31579 /* Now an unpck[lh]pd will produce the result required. */
31581 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31583 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31589 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
31590 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
31591 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
31593 t1 = gen_reg_rtx (V8SFmode);
31594 t2 = gen_reg_rtx (V8SFmode);
31595 t3 = gen_reg_rtx (V8SFmode);
31596 t4 = gen_reg_rtx (V8SFmode);
31598 /* Shuffle within the 128-bit lanes to produce:
31599 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
31600 expand_vselect (t1, d->op0, perm1, 8);
31601 expand_vselect (t2, d->op1, perm1, 8);
31603 /* Shuffle the lanes around to produce:
31604 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
31605 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
31606 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
31608 /* Now a vpermil2p will produce the result required. */
31609 /* ??? The vpermil2p requires a vector constant. Another option
31610 is a unpck[lh]ps to merge the two vectors to produce
31611 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
31612 vpermilps to get the elements into the final order. */
31615 memcpy (d->perm, odd ? permo: perme, 8);
31616 expand_vec_perm_vpermil (d);
31624 /* These are always directly implementable by expand_vec_perm_1. */
31625 gcc_unreachable ();
31629 return expand_vec_perm_pshufb2 (d);
31632 /* We need 2*log2(N)-1 operations to achieve odd/even
31633 with interleave. */
31634 t1 = gen_reg_rtx (V8HImode);
31635 t2 = gen_reg_rtx (V8HImode);
31636 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31637 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31638 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31639 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31641 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31643 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31650 return expand_vec_perm_pshufb2 (d);
31653 t1 = gen_reg_rtx (V16QImode);
31654 t2 = gen_reg_rtx (V16QImode);
31655 t3 = gen_reg_rtx (V16QImode);
31656 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31657 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31658 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31659 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31660 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31661 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31663 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31665 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31671 gcc_unreachable ();
31677 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31678 extract-even and extract-odd permutations. */
31681 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31683 unsigned i, odd, nelt = d->nelt;
31686 if (odd != 0 && odd != 1)
31689 for (i = 1; i < nelt; ++i)
31690 if (d->perm[i] != 2 * i + odd)
31693 return expand_vec_perm_even_odd_1 (d, odd);
31696 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31697 permutations. We assume that expand_vec_perm_1 has already failed. */
31700 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31702 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31703 enum machine_mode vmode = d->vmode;
31704 unsigned char perm2[4];
31712 /* These are special-cased in sse.md so that we can optionally
31713 use the vbroadcast instruction. They expand to two insns
31714 if the input happens to be in a register. */
31715 gcc_unreachable ();
31721 /* These are always implementable using standard shuffle patterns. */
31722 gcc_unreachable ();
31726 /* These can be implemented via interleave. We save one insn by
31727 stopping once we have promoted to V4SImode and then use pshufd. */
31730 optab otab = vec_interleave_low_optab;
31734 otab = vec_interleave_high_optab;
31739 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31740 vmode = get_mode_wider_vector (vmode);
31741 op0 = gen_lowpart (vmode, op0);
31743 while (vmode != V4SImode);
31745 memset (perm2, elt, 4);
31746 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31751 gcc_unreachable ();
31755 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31756 broadcast permutations. */
31759 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31761 unsigned i, elt, nelt = d->nelt;
31763 if (d->op0 != d->op1)
31767 for (i = 1; i < nelt; ++i)
31768 if (d->perm[i] != elt)
31771 return expand_vec_perm_broadcast_1 (d);
31774 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31775 With all of the interface bits taken care of, perform the expansion
31776 in D and return true on success. */
31779 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31781 /* Try a single instruction expansion. */
31782 if (expand_vec_perm_1 (d))
31785 /* Try sequences of two instructions. */
31787 if (expand_vec_perm_pshuflw_pshufhw (d))
31790 if (expand_vec_perm_palignr (d))
31793 if (expand_vec_perm_interleave2 (d))
31796 if (expand_vec_perm_broadcast (d))
31799 /* Try sequences of three instructions. */
31801 if (expand_vec_perm_pshufb2 (d))
31804 /* ??? Look for narrow permutations whose element orderings would
31805 allow the promotion to a wider mode. */
31807 /* ??? Look for sequences of interleave or a wider permute that place
31808 the data into the correct lanes for a half-vector shuffle like
31809 pshuf[lh]w or vpermilps. */
31811 /* ??? Look for sequences of interleave that produce the desired results.
31812 The combinatorics of punpck[lh] get pretty ugly... */
31814 if (expand_vec_perm_even_odd (d))
31820 /* Extract the values from the vector CST into the permutation array in D.
31821 Return 0 on error, 1 if all values from the permutation come from the
31822 first vector, 2 if all values from the second vector, and 3 otherwise. */
31825 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31827 tree list = TREE_VECTOR_CST_ELTS (cst);
31828 unsigned i, nelt = d->nelt;
31831 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31833 unsigned HOST_WIDE_INT e;
31835 if (!host_integerp (TREE_VALUE (list), 1))
31837 e = tree_low_cst (TREE_VALUE (list), 1);
31841 ret |= (e < nelt ? 1 : 2);
31844 gcc_assert (list == NULL);
31846 /* For all elements from second vector, fold the elements to first. */
31848 for (i = 0; i < nelt; ++i)
31849 d->perm[i] -= nelt;
31855 ix86_expand_vec_perm_builtin (tree exp)
31857 struct expand_vec_perm_d d;
31858 tree arg0, arg1, arg2;
31860 arg0 = CALL_EXPR_ARG (exp, 0);
31861 arg1 = CALL_EXPR_ARG (exp, 1);
31862 arg2 = CALL_EXPR_ARG (exp, 2);
31864 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31865 d.nelt = GET_MODE_NUNITS (d.vmode);
31866 d.testing_p = false;
31867 gcc_assert (VECTOR_MODE_P (d.vmode));
31869 if (TREE_CODE (arg2) != VECTOR_CST)
31871 error_at (EXPR_LOCATION (exp),
31872 "vector permutation requires vector constant");
31876 switch (extract_vec_perm_cst (&d, arg2))
31882 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31886 if (!operand_equal_p (arg0, arg1, 0))
31888 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31889 d.op0 = force_reg (d.vmode, d.op0);
31890 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31891 d.op1 = force_reg (d.vmode, d.op1);
31895 /* The elements of PERM do not suggest that only the first operand
31896 is used, but both operands are identical. Allow easier matching
31897 of the permutation by folding the permutation into the single
31900 unsigned i, nelt = d.nelt;
31901 for (i = 0; i < nelt; ++i)
31902 if (d.perm[i] >= nelt)
31908 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31909 d.op0 = force_reg (d.vmode, d.op0);
31914 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31915 d.op0 = force_reg (d.vmode, d.op0);
31920 d.target = gen_reg_rtx (d.vmode);
31921 if (ix86_expand_vec_perm_builtin_1 (&d))
31924 /* For compiler generated permutations, we should never got here, because
31925 the compiler should also be checking the ok hook. But since this is a
31926 builtin the user has access too, so don't abort. */
31930 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31933 sorry ("vector permutation (%d %d %d %d)",
31934 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31937 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31938 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31939 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31942 sorry ("vector permutation "
31943 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31944 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31945 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31946 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31947 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31950 gcc_unreachable ();
31953 return CONST0_RTX (d.vmode);
31956 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31959 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31961 struct expand_vec_perm_d d;
31965 d.vmode = TYPE_MODE (vec_type);
31966 d.nelt = GET_MODE_NUNITS (d.vmode);
31967 d.testing_p = true;
31969 /* Given sufficient ISA support we can just return true here
31970 for selected vector modes. */
31971 if (GET_MODE_SIZE (d.vmode) == 16)
31973 /* All implementable with a single vpperm insn. */
31976 /* All implementable with 2 pshufb + 1 ior. */
31979 /* All implementable with shufpd or unpck[lh]pd. */
31984 vec_mask = extract_vec_perm_cst (&d, mask);
31986 /* This hook is cannot be called in response to something that the
31987 user does (unlike the builtin expander) so we shouldn't ever see
31988 an error generated from the extract. */
31989 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31990 one_vec = (vec_mask != 3);
31992 /* Implementable with shufps or pshufd. */
31993 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31996 /* Otherwise we have to go through the motions and see if we can
31997 figure out how to generate the requested permutation. */
31998 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31999 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32001 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32004 ret = ix86_expand_vec_perm_builtin_1 (&d);
32011 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32013 struct expand_vec_perm_d d;
32019 d.vmode = GET_MODE (targ);
32020 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32021 d.testing_p = false;
32023 for (i = 0; i < nelt; ++i)
32024 d.perm[i] = i * 2 + odd;
32026 /* We'll either be able to implement the permutation directly... */
32027 if (expand_vec_perm_1 (&d))
32030 /* ... or we use the special-case patterns. */
32031 expand_vec_perm_even_odd_1 (&d, odd);
32034 /* This function returns the calling abi specific va_list type node.
32035 It returns the FNDECL specific va_list type. */
32038 ix86_fn_abi_va_list (tree fndecl)
32041 return va_list_type_node;
32042 gcc_assert (fndecl != NULL_TREE);
32044 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32045 return ms_va_list_type_node;
32047 return sysv_va_list_type_node;
32050 /* Returns the canonical va_list type specified by TYPE. If there
32051 is no valid TYPE provided, it return NULL_TREE. */
32054 ix86_canonical_va_list_type (tree type)
32058 /* Resolve references and pointers to va_list type. */
32059 if (TREE_CODE (type) == MEM_REF)
32060 type = TREE_TYPE (type);
32061 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32062 type = TREE_TYPE (type);
32063 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32064 type = TREE_TYPE (type);
32068 wtype = va_list_type_node;
32069 gcc_assert (wtype != NULL_TREE);
32071 if (TREE_CODE (wtype) == ARRAY_TYPE)
32073 /* If va_list is an array type, the argument may have decayed
32074 to a pointer type, e.g. by being passed to another function.
32075 In that case, unwrap both types so that we can compare the
32076 underlying records. */
32077 if (TREE_CODE (htype) == ARRAY_TYPE
32078 || POINTER_TYPE_P (htype))
32080 wtype = TREE_TYPE (wtype);
32081 htype = TREE_TYPE (htype);
32084 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32085 return va_list_type_node;
32086 wtype = sysv_va_list_type_node;
32087 gcc_assert (wtype != NULL_TREE);
32089 if (TREE_CODE (wtype) == ARRAY_TYPE)
32091 /* If va_list is an array type, the argument may have decayed
32092 to a pointer type, e.g. by being passed to another function.
32093 In that case, unwrap both types so that we can compare the
32094 underlying records. */
32095 if (TREE_CODE (htype) == ARRAY_TYPE
32096 || POINTER_TYPE_P (htype))
32098 wtype = TREE_TYPE (wtype);
32099 htype = TREE_TYPE (htype);
32102 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32103 return sysv_va_list_type_node;
32104 wtype = ms_va_list_type_node;
32105 gcc_assert (wtype != NULL_TREE);
32107 if (TREE_CODE (wtype) == ARRAY_TYPE)
32109 /* If va_list is an array type, the argument may have decayed
32110 to a pointer type, e.g. by being passed to another function.
32111 In that case, unwrap both types so that we can compare the
32112 underlying records. */
32113 if (TREE_CODE (htype) == ARRAY_TYPE
32114 || POINTER_TYPE_P (htype))
32116 wtype = TREE_TYPE (wtype);
32117 htype = TREE_TYPE (htype);
32120 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32121 return ms_va_list_type_node;
32124 return std_canonical_va_list_type (type);
32127 /* Iterate through the target-specific builtin types for va_list.
32128 IDX denotes the iterator, *PTREE is set to the result type of
32129 the va_list builtin, and *PNAME to its internal type.
32130 Returns zero if there is no element for this index, otherwise
32131 IDX should be increased upon the next call.
32132 Note, do not iterate a base builtin's name like __builtin_va_list.
32133 Used from c_common_nodes_and_builtins. */
32136 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32146 *ptree = ms_va_list_type_node;
32147 *pname = "__builtin_ms_va_list";
32151 *ptree = sysv_va_list_type_node;
32152 *pname = "__builtin_sysv_va_list";
32160 #undef TARGET_SCHED_DISPATCH
32161 #define TARGET_SCHED_DISPATCH has_dispatch
32162 #undef TARGET_SCHED_DISPATCH_DO
32163 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32165 /* The size of the dispatch window is the total number of bytes of
32166 object code allowed in a window. */
32167 #define DISPATCH_WINDOW_SIZE 16
32169 /* Number of dispatch windows considered for scheduling. */
32170 #define MAX_DISPATCH_WINDOWS 3
32172 /* Maximum number of instructions in a window. */
32175 /* Maximum number of immediate operands in a window. */
32178 /* Maximum number of immediate bits allowed in a window. */
32179 #define MAX_IMM_SIZE 128
32181 /* Maximum number of 32 bit immediates allowed in a window. */
32182 #define MAX_IMM_32 4
32184 /* Maximum number of 64 bit immediates allowed in a window. */
32185 #define MAX_IMM_64 2
32187 /* Maximum total of loads or prefetches allowed in a window. */
32190 /* Maximum total of stores allowed in a window. */
32191 #define MAX_STORE 1
32197 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32198 enum dispatch_group {
32213 /* Number of allowable groups in a dispatch window. It is an array
32214 indexed by dispatch_group enum. 100 is used as a big number,
32215 because the number of these kind of operations does not have any
32216 effect in dispatch window, but we need them for other reasons in
32218 static unsigned int num_allowable_groups[disp_last] = {
32219 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32222 char group_name[disp_last + 1][16] = {
32223 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32224 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32225 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32228 /* Instruction path. */
32231 path_single, /* Single micro op. */
32232 path_double, /* Double micro op. */
32233 path_multi, /* Instructions with more than 2 micro op.. */
32237 /* sched_insn_info defines a window to the instructions scheduled in
32238 the basic block. It contains a pointer to the insn_info table and
32239 the instruction scheduled.
32241 Windows are allocated for each basic block and are linked
32243 typedef struct sched_insn_info_s {
32245 enum dispatch_group group;
32246 enum insn_path path;
32251 /* Linked list of dispatch windows. This is a two way list of
32252 dispatch windows of a basic block. It contains information about
32253 the number of uops in the window and the total number of
32254 instructions and of bytes in the object code for this dispatch
32256 typedef struct dispatch_windows_s {
32257 int num_insn; /* Number of insn in the window. */
32258 int num_uops; /* Number of uops in the window. */
32259 int window_size; /* Number of bytes in the window. */
32260 int window_num; /* Window number between 0 or 1. */
32261 int num_imm; /* Number of immediates in an insn. */
32262 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32263 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32264 int imm_size; /* Total immediates in the window. */
32265 int num_loads; /* Total memory loads in the window. */
32266 int num_stores; /* Total memory stores in the window. */
32267 int violation; /* Violation exists in window. */
32268 sched_insn_info *window; /* Pointer to the window. */
32269 struct dispatch_windows_s *next;
32270 struct dispatch_windows_s *prev;
32271 } dispatch_windows;
32273 /* Immediate valuse used in an insn. */
32274 typedef struct imm_info_s
32281 static dispatch_windows *dispatch_window_list;
32282 static dispatch_windows *dispatch_window_list1;
32284 /* Get dispatch group of insn. */
32286 static enum dispatch_group
32287 get_mem_group (rtx insn)
32289 enum attr_memory memory;
32291 if (INSN_CODE (insn) < 0)
32292 return disp_no_group;
32293 memory = get_attr_memory (insn);
32294 if (memory == MEMORY_STORE)
32297 if (memory == MEMORY_LOAD)
32300 if (memory == MEMORY_BOTH)
32301 return disp_load_store;
32303 return disp_no_group;
32306 /* Return true if insn is a compare instruction. */
32311 enum attr_type type;
32313 type = get_attr_type (insn);
32314 return (type == TYPE_TEST
32315 || type == TYPE_ICMP
32316 || type == TYPE_FCMP
32317 || GET_CODE (PATTERN (insn)) == COMPARE);
32320 /* Return true if a dispatch violation encountered. */
32323 dispatch_violation (void)
32325 if (dispatch_window_list->next)
32326 return dispatch_window_list->next->violation;
32327 return dispatch_window_list->violation;
32330 /* Return true if insn is a branch instruction. */
32333 is_branch (rtx insn)
32335 return (CALL_P (insn) || JUMP_P (insn));
32338 /* Return true if insn is a prefetch instruction. */
32341 is_prefetch (rtx insn)
32343 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
32346 /* This function initializes a dispatch window and the list container holding a
32347 pointer to the window. */
32350 init_window (int window_num)
32353 dispatch_windows *new_list;
32355 if (window_num == 0)
32356 new_list = dispatch_window_list;
32358 new_list = dispatch_window_list1;
32360 new_list->num_insn = 0;
32361 new_list->num_uops = 0;
32362 new_list->window_size = 0;
32363 new_list->next = NULL;
32364 new_list->prev = NULL;
32365 new_list->window_num = window_num;
32366 new_list->num_imm = 0;
32367 new_list->num_imm_32 = 0;
32368 new_list->num_imm_64 = 0;
32369 new_list->imm_size = 0;
32370 new_list->num_loads = 0;
32371 new_list->num_stores = 0;
32372 new_list->violation = false;
32374 for (i = 0; i < MAX_INSN; i++)
32376 new_list->window[i].insn = NULL;
32377 new_list->window[i].group = disp_no_group;
32378 new_list->window[i].path = no_path;
32379 new_list->window[i].byte_len = 0;
32380 new_list->window[i].imm_bytes = 0;
32385 /* This function allocates and initializes a dispatch window and the
32386 list container holding a pointer to the window. */
32388 static dispatch_windows *
32389 allocate_window (void)
32391 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32392 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32397 /* This routine initializes the dispatch scheduling information. It
32398 initiates building dispatch scheduler tables and constructs the
32399 first dispatch window. */
32402 init_dispatch_sched (void)
32404 /* Allocate a dispatch list and a window. */
32405 dispatch_window_list = allocate_window ();
32406 dispatch_window_list1 = allocate_window ();
32411 /* This function returns true if a branch is detected. End of a basic block
32412 does not have to be a branch, but here we assume only branches end a
32416 is_end_basic_block (enum dispatch_group group)
32418 return group == disp_branch;
32421 /* This function is called when the end of a window processing is reached. */
32424 process_end_window (void)
32426 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32427 if (dispatch_window_list->next)
32429 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32430 gcc_assert (dispatch_window_list->window_size
32431 + dispatch_window_list1->window_size <= 48);
32437 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32438 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32439 for 48 bytes of instructions. Note that these windows are not dispatch
32440 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32442 static dispatch_windows *
32443 allocate_next_window (int window_num)
32445 if (window_num == 0)
32447 if (dispatch_window_list->next)
32450 return dispatch_window_list;
32453 dispatch_window_list->next = dispatch_window_list1;
32454 dispatch_window_list1->prev = dispatch_window_list;
32456 return dispatch_window_list1;
32459 /* Increment the number of immediate operands of an instruction. */
32462 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32467 switch ( GET_CODE (*in_rtx))
32472 (imm_values->imm)++;
32473 if (x86_64_immediate_operand (*in_rtx, SImode))
32474 (imm_values->imm32)++;
32476 (imm_values->imm64)++;
32480 (imm_values->imm)++;
32481 (imm_values->imm64)++;
32485 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32487 (imm_values->imm)++;
32488 (imm_values->imm32)++;
32499 /* Compute number of immediate operands of an instruction. */
32502 find_constant (rtx in_rtx, imm_info *imm_values)
32504 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32505 (rtx_function) find_constant_1, (void *) imm_values);
32508 /* Return total size of immediate operands of an instruction along with number
32509 of corresponding immediate-operands. It initializes its parameters to zero
32510 befor calling FIND_CONSTANT.
32511 INSN is the input instruction. IMM is the total of immediates.
32512 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32516 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32518 imm_info imm_values = {0, 0, 0};
32520 find_constant (insn, &imm_values);
32521 *imm = imm_values.imm;
32522 *imm32 = imm_values.imm32;
32523 *imm64 = imm_values.imm64;
32524 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32527 /* This function indicates if an operand of an instruction is an
32531 has_immediate (rtx insn)
32533 int num_imm_operand;
32534 int num_imm32_operand;
32535 int num_imm64_operand;
32538 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32539 &num_imm64_operand);
32543 /* Return single or double path for instructions. */
32545 static enum insn_path
32546 get_insn_path (rtx insn)
32548 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32550 if ((int)path == 0)
32551 return path_single;
32553 if ((int)path == 1)
32554 return path_double;
32559 /* Return insn dispatch group. */
32561 static enum dispatch_group
32562 get_insn_group (rtx insn)
32564 enum dispatch_group group = get_mem_group (insn);
32568 if (is_branch (insn))
32569 return disp_branch;
32574 if (has_immediate (insn))
32577 if (is_prefetch (insn))
32578 return disp_prefetch;
32580 return disp_no_group;
32583 /* Count number of GROUP restricted instructions in a dispatch
32584 window WINDOW_LIST. */
32587 count_num_restricted (rtx insn, dispatch_windows *window_list)
32589 enum dispatch_group group = get_insn_group (insn);
32591 int num_imm_operand;
32592 int num_imm32_operand;
32593 int num_imm64_operand;
32595 if (group == disp_no_group)
32598 if (group == disp_imm)
32600 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32601 &num_imm64_operand);
32602 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32603 || num_imm_operand + window_list->num_imm > MAX_IMM
32604 || (num_imm32_operand > 0
32605 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32606 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32607 || (num_imm64_operand > 0
32608 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32609 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32610 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32611 && num_imm64_operand > 0
32612 && ((window_list->num_imm_64 > 0
32613 && window_list->num_insn >= 2)
32614 || window_list->num_insn >= 3)))
32620 if ((group == disp_load_store
32621 && (window_list->num_loads >= MAX_LOAD
32622 || window_list->num_stores >= MAX_STORE))
32623 || ((group == disp_load
32624 || group == disp_prefetch)
32625 && window_list->num_loads >= MAX_LOAD)
32626 || (group == disp_store
32627 && window_list->num_stores >= MAX_STORE))
32633 /* This function returns true if insn satisfies dispatch rules on the
32634 last window scheduled. */
32637 fits_dispatch_window (rtx insn)
32639 dispatch_windows *window_list = dispatch_window_list;
32640 dispatch_windows *window_list_next = dispatch_window_list->next;
32641 unsigned int num_restrict;
32642 enum dispatch_group group = get_insn_group (insn);
32643 enum insn_path path = get_insn_path (insn);
32646 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32647 instructions should be given the lowest priority in the
32648 scheduling process in Haifa scheduler to make sure they will be
32649 scheduled in the same dispatch window as the refrence to them. */
32650 if (group == disp_jcc || group == disp_cmp)
32653 /* Check nonrestricted. */
32654 if (group == disp_no_group || group == disp_branch)
32657 /* Get last dispatch window. */
32658 if (window_list_next)
32659 window_list = window_list_next;
32661 if (window_list->window_num == 1)
32663 sum = window_list->prev->window_size + window_list->window_size;
32666 || (min_insn_size (insn) + sum) >= 48)
32667 /* Window 1 is full. Go for next window. */
32671 num_restrict = count_num_restricted (insn, window_list);
32673 if (num_restrict > num_allowable_groups[group])
32676 /* See if it fits in the first window. */
32677 if (window_list->window_num == 0)
32679 /* The first widow should have only single and double path
32681 if (path == path_double
32682 && (window_list->num_uops + 2) > MAX_INSN)
32684 else if (path != path_single)
32690 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32691 dispatch window WINDOW_LIST. */
32694 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32696 int byte_len = min_insn_size (insn);
32697 int num_insn = window_list->num_insn;
32699 sched_insn_info *window = window_list->window;
32700 enum dispatch_group group = get_insn_group (insn);
32701 enum insn_path path = get_insn_path (insn);
32702 int num_imm_operand;
32703 int num_imm32_operand;
32704 int num_imm64_operand;
32706 if (!window_list->violation && group != disp_cmp
32707 && !fits_dispatch_window (insn))
32708 window_list->violation = true;
32710 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32711 &num_imm64_operand);
32713 /* Initialize window with new instruction. */
32714 window[num_insn].insn = insn;
32715 window[num_insn].byte_len = byte_len;
32716 window[num_insn].group = group;
32717 window[num_insn].path = path;
32718 window[num_insn].imm_bytes = imm_size;
32720 window_list->window_size += byte_len;
32721 window_list->num_insn = num_insn + 1;
32722 window_list->num_uops = window_list->num_uops + num_uops;
32723 window_list->imm_size += imm_size;
32724 window_list->num_imm += num_imm_operand;
32725 window_list->num_imm_32 += num_imm32_operand;
32726 window_list->num_imm_64 += num_imm64_operand;
32728 if (group == disp_store)
32729 window_list->num_stores += 1;
32730 else if (group == disp_load
32731 || group == disp_prefetch)
32732 window_list->num_loads += 1;
32733 else if (group == disp_load_store)
32735 window_list->num_stores += 1;
32736 window_list->num_loads += 1;
32740 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32741 If the total bytes of instructions or the number of instructions in
32742 the window exceed allowable, it allocates a new window. */
32745 add_to_dispatch_window (rtx insn)
32748 dispatch_windows *window_list;
32749 dispatch_windows *next_list;
32750 dispatch_windows *window0_list;
32751 enum insn_path path;
32752 enum dispatch_group insn_group;
32760 if (INSN_CODE (insn) < 0)
32763 byte_len = min_insn_size (insn);
32764 window_list = dispatch_window_list;
32765 next_list = window_list->next;
32766 path = get_insn_path (insn);
32767 insn_group = get_insn_group (insn);
32769 /* Get the last dispatch window. */
32771 window_list = dispatch_window_list->next;
32773 if (path == path_single)
32775 else if (path == path_double)
32778 insn_num_uops = (int) path;
32780 /* If current window is full, get a new window.
32781 Window number zero is full, if MAX_INSN uops are scheduled in it.
32782 Window number one is full, if window zero's bytes plus window
32783 one's bytes is 32, or if the bytes of the new instruction added
32784 to the total makes it greater than 48, or it has already MAX_INSN
32785 instructions in it. */
32786 num_insn = window_list->num_insn;
32787 num_uops = window_list->num_uops;
32788 window_num = window_list->window_num;
32789 insn_fits = fits_dispatch_window (insn);
32791 if (num_insn >= MAX_INSN
32792 || num_uops + insn_num_uops > MAX_INSN
32795 window_num = ~window_num & 1;
32796 window_list = allocate_next_window (window_num);
32799 if (window_num == 0)
32801 add_insn_window (insn, window_list, insn_num_uops);
32802 if (window_list->num_insn >= MAX_INSN
32803 && insn_group == disp_branch)
32805 process_end_window ();
32809 else if (window_num == 1)
32811 window0_list = window_list->prev;
32812 sum = window0_list->window_size + window_list->window_size;
32814 || (byte_len + sum) >= 48)
32816 process_end_window ();
32817 window_list = dispatch_window_list;
32820 add_insn_window (insn, window_list, insn_num_uops);
32823 gcc_unreachable ();
32825 if (is_end_basic_block (insn_group))
32827 /* End of basic block is reached do end-basic-block process. */
32828 process_end_window ();
32833 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32835 DEBUG_FUNCTION static void
32836 debug_dispatch_window_file (FILE *file, int window_num)
32838 dispatch_windows *list;
32841 if (window_num == 0)
32842 list = dispatch_window_list;
32844 list = dispatch_window_list1;
32846 fprintf (file, "Window #%d:\n", list->window_num);
32847 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32848 list->num_insn, list->num_uops, list->window_size);
32849 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32850 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32852 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32854 fprintf (file, " insn info:\n");
32856 for (i = 0; i < MAX_INSN; i++)
32858 if (!list->window[i].insn)
32860 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32861 i, group_name[list->window[i].group],
32862 i, (void *)list->window[i].insn,
32863 i, list->window[i].path,
32864 i, list->window[i].byte_len,
32865 i, list->window[i].imm_bytes);
32869 /* Print to stdout a dispatch window. */
32871 DEBUG_FUNCTION void
32872 debug_dispatch_window (int window_num)
32874 debug_dispatch_window_file (stdout, window_num);
32877 /* Print INSN dispatch information to FILE. */
32879 DEBUG_FUNCTION static void
32880 debug_insn_dispatch_info_file (FILE *file, rtx insn)
32883 enum insn_path path;
32884 enum dispatch_group group;
32886 int num_imm_operand;
32887 int num_imm32_operand;
32888 int num_imm64_operand;
32890 if (INSN_CODE (insn) < 0)
32893 byte_len = min_insn_size (insn);
32894 path = get_insn_path (insn);
32895 group = get_insn_group (insn);
32896 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32897 &num_imm64_operand);
32899 fprintf (file, " insn info:\n");
32900 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
32901 group_name[group], path, byte_len);
32902 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32903 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
32906 /* Print to STDERR the status of the ready list with respect to
32907 dispatch windows. */
32909 DEBUG_FUNCTION void
32910 debug_ready_dispatch (void)
32913 int no_ready = number_in_ready ();
32915 fprintf (stdout, "Number of ready: %d\n", no_ready);
32917 for (i = 0; i < no_ready; i++)
32918 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
32921 /* This routine is the driver of the dispatch scheduler. */
32924 do_dispatch (rtx insn, int mode)
32926 if (mode == DISPATCH_INIT)
32927 init_dispatch_sched ();
32928 else if (mode == ADD_TO_DISPATCH_WINDOW)
32929 add_to_dispatch_window (insn);
32932 /* Return TRUE if Dispatch Scheduling is supported. */
32935 has_dispatch (rtx insn, int action)
32937 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
32943 case IS_DISPATCH_ON:
32948 return is_cmp (insn);
32950 case DISPATCH_VIOLATION:
32951 return dispatch_violation ();
32953 case FITS_DISPATCH_WINDOW:
32954 return fits_dispatch_window (insn);
32960 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
32961 place emms and femms instructions. */
32963 static unsigned int
32964 ix86_units_per_simd_word (enum machine_mode mode)
32966 /* Disable double precision vectorizer if needed. */
32967 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
32968 return UNITS_PER_WORD;
32971 /* FIXME: AVX has 32byte floating point vector operations and 16byte
32972 integer vector operations. But vectorizer doesn't support
32973 different sizes for integer and floating point vectors. We limit
32974 vector size to 16byte. */
32976 return (mode == DFmode || mode == SFmode) ? 32 : 16;
32979 return TARGET_SSE ? 16 : UNITS_PER_WORD;
32982 /* Initialize the GCC target structure. */
32983 #undef TARGET_RETURN_IN_MEMORY
32984 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
32986 #undef TARGET_LEGITIMIZE_ADDRESS
32987 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
32989 #undef TARGET_ATTRIBUTE_TABLE
32990 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
32991 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32992 # undef TARGET_MERGE_DECL_ATTRIBUTES
32993 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
32996 #undef TARGET_COMP_TYPE_ATTRIBUTES
32997 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
32999 #undef TARGET_INIT_BUILTINS
33000 #define TARGET_INIT_BUILTINS ix86_init_builtins
33001 #undef TARGET_BUILTIN_DECL
33002 #define TARGET_BUILTIN_DECL ix86_builtin_decl
33003 #undef TARGET_EXPAND_BUILTIN
33004 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
33006 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
33007 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33008 ix86_builtin_vectorized_function
33010 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33011 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33013 #undef TARGET_BUILTIN_RECIPROCAL
33014 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33016 #undef TARGET_ASM_FUNCTION_EPILOGUE
33017 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33019 #undef TARGET_ENCODE_SECTION_INFO
33020 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33021 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33023 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33026 #undef TARGET_ASM_OPEN_PAREN
33027 #define TARGET_ASM_OPEN_PAREN ""
33028 #undef TARGET_ASM_CLOSE_PAREN
33029 #define TARGET_ASM_CLOSE_PAREN ""
33031 #undef TARGET_ASM_BYTE_OP
33032 #define TARGET_ASM_BYTE_OP ASM_BYTE
33034 #undef TARGET_ASM_ALIGNED_HI_OP
33035 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33036 #undef TARGET_ASM_ALIGNED_SI_OP
33037 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33039 #undef TARGET_ASM_ALIGNED_DI_OP
33040 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33043 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33044 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33046 #undef TARGET_ASM_UNALIGNED_HI_OP
33047 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33048 #undef TARGET_ASM_UNALIGNED_SI_OP
33049 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33050 #undef TARGET_ASM_UNALIGNED_DI_OP
33051 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33053 #undef TARGET_PRINT_OPERAND
33054 #define TARGET_PRINT_OPERAND ix86_print_operand
33055 #undef TARGET_PRINT_OPERAND_ADDRESS
33056 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33057 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33058 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33059 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33060 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33062 #undef TARGET_SCHED_ADJUST_COST
33063 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33064 #undef TARGET_SCHED_ISSUE_RATE
33065 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33066 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33067 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33068 ia32_multipass_dfa_lookahead
33070 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33071 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33074 #undef TARGET_HAVE_TLS
33075 #define TARGET_HAVE_TLS true
33077 #undef TARGET_CANNOT_FORCE_CONST_MEM
33078 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33079 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33080 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33082 #undef TARGET_DELEGITIMIZE_ADDRESS
33083 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33085 #undef TARGET_MS_BITFIELD_LAYOUT_P
33086 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33089 #undef TARGET_BINDS_LOCAL_P
33090 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33092 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33093 #undef TARGET_BINDS_LOCAL_P
33094 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33097 #undef TARGET_ASM_OUTPUT_MI_THUNK
33098 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33099 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33100 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33102 #undef TARGET_ASM_FILE_START
33103 #define TARGET_ASM_FILE_START x86_file_start
33105 #undef TARGET_DEFAULT_TARGET_FLAGS
33106 #define TARGET_DEFAULT_TARGET_FLAGS \
33108 | TARGET_SUBTARGET_DEFAULT \
33109 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33112 #undef TARGET_HANDLE_OPTION
33113 #define TARGET_HANDLE_OPTION ix86_handle_option
33115 #undef TARGET_OPTION_OVERRIDE
33116 #define TARGET_OPTION_OVERRIDE ix86_option_override
33117 #undef TARGET_OPTION_OPTIMIZATION
33118 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
33120 #undef TARGET_REGISTER_MOVE_COST
33121 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33122 #undef TARGET_MEMORY_MOVE_COST
33123 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33124 #undef TARGET_RTX_COSTS
33125 #define TARGET_RTX_COSTS ix86_rtx_costs
33126 #undef TARGET_ADDRESS_COST
33127 #define TARGET_ADDRESS_COST ix86_address_cost
33129 #undef TARGET_FIXED_CONDITION_CODE_REGS
33130 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33131 #undef TARGET_CC_MODES_COMPATIBLE
33132 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33134 #undef TARGET_MACHINE_DEPENDENT_REORG
33135 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33137 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33138 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33140 #undef TARGET_BUILD_BUILTIN_VA_LIST
33141 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33143 #undef TARGET_ENUM_VA_LIST_P
33144 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33146 #undef TARGET_FN_ABI_VA_LIST
33147 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33149 #undef TARGET_CANONICAL_VA_LIST_TYPE
33150 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33152 #undef TARGET_EXPAND_BUILTIN_VA_START
33153 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33155 #undef TARGET_MD_ASM_CLOBBERS
33156 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33158 #undef TARGET_PROMOTE_PROTOTYPES
33159 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33160 #undef TARGET_STRUCT_VALUE_RTX
33161 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33162 #undef TARGET_SETUP_INCOMING_VARARGS
33163 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33164 #undef TARGET_MUST_PASS_IN_STACK
33165 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33166 #undef TARGET_FUNCTION_ARG_ADVANCE
33167 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33168 #undef TARGET_FUNCTION_ARG
33169 #define TARGET_FUNCTION_ARG ix86_function_arg
33170 #undef TARGET_PASS_BY_REFERENCE
33171 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33172 #undef TARGET_INTERNAL_ARG_POINTER
33173 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33174 #undef TARGET_UPDATE_STACK_BOUNDARY
33175 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33176 #undef TARGET_GET_DRAP_RTX
33177 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33178 #undef TARGET_STRICT_ARGUMENT_NAMING
33179 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33180 #undef TARGET_STATIC_CHAIN
33181 #define TARGET_STATIC_CHAIN ix86_static_chain
33182 #undef TARGET_TRAMPOLINE_INIT
33183 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33184 #undef TARGET_RETURN_POPS_ARGS
33185 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33187 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33188 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33190 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33191 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33193 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33194 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33196 #undef TARGET_C_MODE_FOR_SUFFIX
33197 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33200 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33201 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33204 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33205 #undef TARGET_INSERT_ATTRIBUTES
33206 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33209 #undef TARGET_MANGLE_TYPE
33210 #define TARGET_MANGLE_TYPE ix86_mangle_type
33212 #undef TARGET_STACK_PROTECT_FAIL
33213 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33215 #undef TARGET_SUPPORTS_SPLIT_STACK
33216 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33218 #undef TARGET_FUNCTION_VALUE
33219 #define TARGET_FUNCTION_VALUE ix86_function_value
33221 #undef TARGET_FUNCTION_VALUE_REGNO_P
33222 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33224 #undef TARGET_SECONDARY_RELOAD
33225 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33227 #undef TARGET_CLASS_LIKELY_SPILLED_P
33228 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33230 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33231 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33232 ix86_builtin_vectorization_cost
33233 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33234 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33235 ix86_vectorize_builtin_vec_perm
33236 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33237 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33238 ix86_vectorize_builtin_vec_perm_ok
33239 #undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
33240 #define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD \
33241 ix86_units_per_simd_word
33243 #undef TARGET_SET_CURRENT_FUNCTION
33244 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33246 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33247 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33249 #undef TARGET_OPTION_SAVE
33250 #define TARGET_OPTION_SAVE ix86_function_specific_save
33252 #undef TARGET_OPTION_RESTORE
33253 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33255 #undef TARGET_OPTION_PRINT
33256 #define TARGET_OPTION_PRINT ix86_function_specific_print
33258 #undef TARGET_CAN_INLINE_P
33259 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33261 #undef TARGET_EXPAND_TO_RTL_HOOK
33262 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33264 #undef TARGET_LEGITIMATE_ADDRESS_P
33265 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33267 #undef TARGET_IRA_COVER_CLASSES
33268 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33270 #undef TARGET_FRAME_POINTER_REQUIRED
33271 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33273 #undef TARGET_CAN_ELIMINATE
33274 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33276 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33277 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33279 #undef TARGET_ASM_CODE_END
33280 #define TARGET_ASM_CODE_END ix86_code_end
33282 struct gcc_target targetm = TARGET_INITIALIZER;
33284 #include "gt-i386.h"