1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1619 m_AMDFAM10 | m_GENERIC,
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1669 static enum stringop_alg stringop_alg = no_stringop;
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1711 /* The "default" register map used in 32bit mode. */
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1724 /* The "default" register map used in 64bit mode. */
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1802 /* Define parameter passing and return registers. */
1804 static int const x86_64_int_parameter_registers[6] =
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1811 CX_REG, DX_REG, R8_REG, R9_REG
1814 static int const x86_64_int_return_registers[4] =
1816 AX_REG, DX_REG, DI_REG, SI_REG
1819 /* Define the structure for the machine field in struct function. */
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1825 struct stack_local_entry *next;
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1835 saved static chain if ix86_static_chain_on_stack
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1844 <- sse_regs_save_offset
1847 [va_arg registers] |
1851 [padding2] | = to_allocate
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1900 /* -mstackrealign option */
1901 extern int ix86_force_align_arg_pointer;
1902 static const char ix86_force_align_arg_pointer_string[]
1903 = "force_align_arg_pointer";
1905 static rtx (*ix86_gen_leave) (void);
1906 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1908 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1909 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1910 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1912 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1913 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1914 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1916 /* Preferred alignment for stack boundary in bits. */
1917 unsigned int ix86_preferred_stack_boundary;
1919 /* Alignment for incoming stack boundary in bits specified at
1921 static unsigned int ix86_user_incoming_stack_boundary;
1923 /* Default alignment for incoming stack boundary in bits. */
1924 static unsigned int ix86_default_incoming_stack_boundary;
1926 /* Alignment for incoming stack boundary in bits. */
1927 unsigned int ix86_incoming_stack_boundary;
1929 /* The abi used by target. */
1930 enum calling_abi ix86_abi;
1932 /* Values 1-5: see jump.c */
1933 int ix86_branch_cost;
1935 /* Calling abi specific va_list type nodes. */
1936 static GTY(()) tree sysv_va_list_type_node;
1937 static GTY(()) tree ms_va_list_type_node;
1939 /* Variables which are this size or smaller are put in the data/bss
1940 or ldata/lbss sections. */
1942 int ix86_section_threshold = 65536;
1944 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1945 char internal_label_prefix[16];
1946 int internal_label_prefix_len;
1948 /* Fence to use after loop using movnt. */
1951 /* Register class used for passing given 64bit part of the argument.
1952 These represent classes as documented by the PS ABI, with the exception
1953 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1954 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1956 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1957 whenever possible (upper half does contain padding). */
1958 enum x86_64_reg_class
1961 X86_64_INTEGER_CLASS,
1962 X86_64_INTEGERSI_CLASS,
1969 X86_64_COMPLEX_X87_CLASS,
1973 #define MAX_CLASSES 4
1975 /* Table of constants used by fldpi, fldln2, etc.... */
1976 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1977 static bool ext_80387_constants_init = 0;
1980 static struct machine_function * ix86_init_machine_status (void);
1981 static rtx ix86_function_value (const_tree, const_tree, bool);
1982 static bool ix86_function_value_regno_p (const unsigned int);
1983 static rtx ix86_static_chain (const_tree, bool);
1984 static int ix86_function_regparm (const_tree, const_tree);
1985 static void ix86_compute_frame_layout (struct ix86_frame *);
1986 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1988 static void ix86_add_new_builtins (int);
1989 static rtx ix86_expand_vec_perm_builtin (tree);
1990 static tree ix86_canonical_va_list_type (tree);
1991 static void predict_jump (int);
1993 enum ix86_function_specific_strings
1995 IX86_FUNCTION_SPECIFIC_ARCH,
1996 IX86_FUNCTION_SPECIFIC_TUNE,
1997 IX86_FUNCTION_SPECIFIC_FPMATH,
1998 IX86_FUNCTION_SPECIFIC_MAX
2001 static char *ix86_target_string (int, int, const char *, const char *,
2002 const char *, bool);
2003 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2004 static void ix86_function_specific_save (struct cl_target_option *);
2005 static void ix86_function_specific_restore (struct cl_target_option *);
2006 static void ix86_function_specific_print (FILE *, int,
2007 struct cl_target_option *);
2008 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2009 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2010 static bool ix86_can_inline_p (tree, tree);
2011 static void ix86_set_current_function (tree);
2012 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2014 static enum calling_abi ix86_function_abi (const_tree);
2017 #ifndef SUBTARGET32_DEFAULT_CPU
2018 #define SUBTARGET32_DEFAULT_CPU "i386"
2021 /* The svr4 ABI for the i386 says that records and unions are returned
2023 #ifndef DEFAULT_PCC_STRUCT_RETURN
2024 #define DEFAULT_PCC_STRUCT_RETURN 1
2027 /* Whether -mtune= or -march= were specified */
2028 static int ix86_tune_defaulted;
2029 static int ix86_arch_specified;
2031 /* A mask of ix86_isa_flags that includes bit X if X
2032 was set or cleared on the command line. */
2033 static int ix86_isa_flags_explicit;
2035 /* Define a set of ISAs which are available when a given ISA is
2036 enabled. MMX and SSE ISAs are handled separately. */
2038 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2039 #define OPTION_MASK_ISA_3DNOW_SET \
2040 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2042 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2043 #define OPTION_MASK_ISA_SSE2_SET \
2044 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2045 #define OPTION_MASK_ISA_SSE3_SET \
2046 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2047 #define OPTION_MASK_ISA_SSSE3_SET \
2048 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2049 #define OPTION_MASK_ISA_SSE4_1_SET \
2050 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2051 #define OPTION_MASK_ISA_SSE4_2_SET \
2052 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2053 #define OPTION_MASK_ISA_AVX_SET \
2054 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2055 #define OPTION_MASK_ISA_FMA_SET \
2056 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2058 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2060 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2062 #define OPTION_MASK_ISA_SSE4A_SET \
2063 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2064 #define OPTION_MASK_ISA_FMA4_SET \
2065 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2066 | OPTION_MASK_ISA_AVX_SET)
2067 #define OPTION_MASK_ISA_XOP_SET \
2068 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2069 #define OPTION_MASK_ISA_LWP_SET \
2072 /* AES and PCLMUL need SSE2 because they use xmm registers */
2073 #define OPTION_MASK_ISA_AES_SET \
2074 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2075 #define OPTION_MASK_ISA_PCLMUL_SET \
2076 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2078 #define OPTION_MASK_ISA_ABM_SET \
2079 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2081 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2082 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2083 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2084 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2085 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2087 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2088 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2089 #define OPTION_MASK_ISA_F16C_SET \
2090 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2092 /* Define a set of ISAs which aren't available when a given ISA is
2093 disabled. MMX and SSE ISAs are handled separately. */
2095 #define OPTION_MASK_ISA_MMX_UNSET \
2096 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2097 #define OPTION_MASK_ISA_3DNOW_UNSET \
2098 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2099 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2101 #define OPTION_MASK_ISA_SSE_UNSET \
2102 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2103 #define OPTION_MASK_ISA_SSE2_UNSET \
2104 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2105 #define OPTION_MASK_ISA_SSE3_UNSET \
2106 (OPTION_MASK_ISA_SSE3 \
2107 | OPTION_MASK_ISA_SSSE3_UNSET \
2108 | OPTION_MASK_ISA_SSE4A_UNSET )
2109 #define OPTION_MASK_ISA_SSSE3_UNSET \
2110 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2111 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2112 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2113 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2114 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2115 #define OPTION_MASK_ISA_AVX_UNSET \
2116 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2117 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2118 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2120 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2122 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2124 #define OPTION_MASK_ISA_SSE4A_UNSET \
2125 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2127 #define OPTION_MASK_ISA_FMA4_UNSET \
2128 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2129 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2130 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2132 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2133 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2134 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2135 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2136 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2137 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2138 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2139 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2141 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2142 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2143 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2145 /* Vectorization library interface and handlers. */
2146 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2148 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2151 /* Processor target table, indexed by processor number */
2154 const struct processor_costs *cost; /* Processor costs */
2155 const int align_loop; /* Default alignments. */
2156 const int align_loop_max_skip;
2157 const int align_jump;
2158 const int align_jump_max_skip;
2159 const int align_func;
2162 static const struct ptt processor_target_table[PROCESSOR_max] =
2164 {&i386_cost, 4, 3, 4, 3, 4},
2165 {&i486_cost, 16, 15, 16, 15, 16},
2166 {&pentium_cost, 16, 7, 16, 7, 16},
2167 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2168 {&geode_cost, 0, 0, 0, 0, 0},
2169 {&k6_cost, 32, 7, 32, 7, 32},
2170 {&athlon_cost, 16, 7, 16, 7, 16},
2171 {&pentium4_cost, 0, 0, 0, 0, 0},
2172 {&k8_cost, 16, 7, 16, 7, 16},
2173 {&nocona_cost, 0, 0, 0, 0, 0},
2174 {&core2_cost, 16, 10, 16, 10, 16},
2175 {&generic32_cost, 16, 7, 16, 7, 16},
2176 {&generic64_cost, 16, 10, 16, 10, 16},
2177 {&amdfam10_cost, 32, 24, 32, 7, 32},
2178 {&bdver1_cost, 32, 24, 32, 7, 32},
2179 {&atom_cost, 16, 7, 16, 7, 16}
2182 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2209 /* Return true if a red-zone is in use. */
2212 ix86_using_red_zone (void)
2214 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2217 /* Implement TARGET_HANDLE_OPTION. */
2220 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2227 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2240 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2241 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2245 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2256 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2261 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2262 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2269 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2274 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2275 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2282 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2287 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2288 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2295 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2300 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2301 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2308 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2313 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2314 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2321 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2326 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2327 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2334 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2339 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2340 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2347 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2348 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2352 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2353 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2358 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2359 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2363 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2364 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2370 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2371 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2375 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2376 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2383 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2384 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2388 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2389 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2396 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2397 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2401 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2402 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2409 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2410 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2414 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2415 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2422 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2423 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2427 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2428 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2435 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2436 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2440 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2441 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2448 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2449 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2453 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2454 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2461 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2462 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2466 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2467 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2474 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2475 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2479 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2480 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2487 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2488 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2492 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2493 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2500 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2501 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2505 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2506 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2513 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2514 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2518 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2519 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2526 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2527 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2531 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2532 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2539 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2540 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2544 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2545 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2552 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2553 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2557 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2558 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2571 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2572 const char *fpmath, bool add_nl_p)
2574 struct ix86_target_opts
2576 const char *option; /* option string */
2577 int mask; /* isa mask options */
2580 /* This table is ordered so that options like -msse4.2 that imply
2581 preceding options while match those first. */
2582 static struct ix86_target_opts isa_opts[] =
2584 { "-m64", OPTION_MASK_ISA_64BIT },
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2590 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2591 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2592 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2593 { "-msse3", OPTION_MASK_ISA_SSE3 },
2594 { "-msse2", OPTION_MASK_ISA_SSE2 },
2595 { "-msse", OPTION_MASK_ISA_SSE },
2596 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2597 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2598 { "-mmmx", OPTION_MASK_ISA_MMX },
2599 { "-mabm", OPTION_MASK_ISA_ABM },
2600 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2601 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2602 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2603 { "-maes", OPTION_MASK_ISA_AES },
2604 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2605 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2606 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2607 { "-mf16c", OPTION_MASK_ISA_F16C },
2611 static struct ix86_target_opts flag_opts[] =
2613 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2614 { "-m80387", MASK_80387 },
2615 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2616 { "-malign-double", MASK_ALIGN_DOUBLE },
2617 { "-mcld", MASK_CLD },
2618 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2619 { "-mieee-fp", MASK_IEEE_FP },
2620 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2621 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2622 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2623 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2624 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2625 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2626 { "-mno-red-zone", MASK_NO_RED_ZONE },
2627 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2628 { "-mrecip", MASK_RECIP },
2629 { "-mrtd", MASK_RTD },
2630 { "-msseregparm", MASK_SSEREGPARM },
2631 { "-mstack-arg-probe", MASK_STACK_PROBE },
2632 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2633 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2636 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2639 char target_other[40];
2648 memset (opts, '\0', sizeof (opts));
2650 /* Add -march= option. */
2653 opts[num][0] = "-march=";
2654 opts[num++][1] = arch;
2657 /* Add -mtune= option. */
2660 opts[num][0] = "-mtune=";
2661 opts[num++][1] = tune;
2664 /* Pick out the options in isa options. */
2665 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2667 if ((isa & isa_opts[i].mask) != 0)
2669 opts[num++][0] = isa_opts[i].option;
2670 isa &= ~ isa_opts[i].mask;
2674 if (isa && add_nl_p)
2676 opts[num++][0] = isa_other;
2677 sprintf (isa_other, "(other isa: %#x)", isa);
2680 /* Add flag options. */
2681 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2683 if ((flags & flag_opts[i].mask) != 0)
2685 opts[num++][0] = flag_opts[i].option;
2686 flags &= ~ flag_opts[i].mask;
2690 if (flags && add_nl_p)
2692 opts[num++][0] = target_other;
2693 sprintf (target_other, "(other flags: %#x)", flags);
2696 /* Add -fpmath= option. */
2699 opts[num][0] = "-mfpmath=";
2700 opts[num++][1] = fpmath;
2707 gcc_assert (num < ARRAY_SIZE (opts));
2709 /* Size the string. */
2711 sep_len = (add_nl_p) ? 3 : 1;
2712 for (i = 0; i < num; i++)
2715 for (j = 0; j < 2; j++)
2717 len += strlen (opts[i][j]);
2720 /* Build the string. */
2721 ret = ptr = (char *) xmalloc (len);
2724 for (i = 0; i < num; i++)
2728 for (j = 0; j < 2; j++)
2729 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2736 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2744 for (j = 0; j < 2; j++)
2747 memcpy (ptr, opts[i][j], len2[j]);
2749 line_len += len2[j];
2754 gcc_assert (ret + len >= ptr);
2759 /* Return TRUE if software prefetching is beneficial for the
2763 software_prefetching_beneficial_p (void)
2767 case PROCESSOR_GEODE:
2769 case PROCESSOR_ATHLON:
2771 case PROCESSOR_AMDFAM10:
2779 /* Return true, if profiling code should be emitted before
2780 prologue. Otherwise it returns false.
2781 Note: For x86 with "hotfix" it is sorried. */
2783 ix86_profile_before_prologue (void)
2785 return flag_fentry != 0;
2788 /* Function that is callable from the debugger to print the current
2791 ix86_debug_options (void)
2793 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2794 ix86_arch_string, ix86_tune_string,
2795 ix86_fpmath_string, true);
2799 fprintf (stderr, "%s\n\n", opts);
2803 fputs ("<no options>\n\n", stderr);
2808 /* Override various settings based on options. If MAIN_ARGS_P, the
2809 options are from the command line, otherwise they are from
2813 ix86_option_override_internal (bool main_args_p)
2816 unsigned int ix86_arch_mask, ix86_tune_mask;
2817 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2822 /* Comes from final.c -- no real reason to change it. */
2823 #define MAX_CODE_ALIGN 16
2831 PTA_PREFETCH_SSE = 1 << 4,
2833 PTA_3DNOW_A = 1 << 6,
2837 PTA_POPCNT = 1 << 10,
2839 PTA_SSE4A = 1 << 12,
2840 PTA_NO_SAHF = 1 << 13,
2841 PTA_SSE4_1 = 1 << 14,
2842 PTA_SSE4_2 = 1 << 15,
2844 PTA_PCLMUL = 1 << 17,
2847 PTA_MOVBE = 1 << 20,
2851 PTA_FSGSBASE = 1 << 24,
2852 PTA_RDRND = 1 << 25,
2858 const char *const name; /* processor name or nickname. */
2859 const enum processor_type processor;
2860 const enum attr_cpu schedule;
2861 const unsigned /*enum pta_flags*/ flags;
2863 const processor_alias_table[] =
2865 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2866 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2867 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2868 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2869 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2870 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2871 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2872 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2873 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2874 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2875 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2876 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2877 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2879 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2882 PTA_MMX | PTA_SSE | PTA_SSE2},
2883 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2884 PTA_MMX |PTA_SSE | PTA_SSE2},
2885 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX | PTA_SSE | PTA_SSE2},
2887 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2889 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2891 | PTA_CX16 | PTA_NO_SAHF},
2892 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2893 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2894 | PTA_SSSE3 | PTA_CX16},
2895 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2896 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2897 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2898 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2900 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2901 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2902 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2903 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2904 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2905 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2909 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"x86-64", PROCESSOR_K8, CPU_K8,
2914 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2915 {"k8", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2917 | PTA_SSE2 | PTA_NO_SAHF},
2918 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2919 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2920 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2921 {"opteron", PROCESSOR_K8, CPU_K8,
2922 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2923 | PTA_SSE2 | PTA_NO_SAHF},
2924 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2925 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2926 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2927 {"athlon64", PROCESSOR_K8, CPU_K8,
2928 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2929 | PTA_SSE2 | PTA_NO_SAHF},
2930 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2931 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2932 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2933 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2934 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2935 | PTA_SSE2 | PTA_NO_SAHF},
2936 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2937 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2938 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2939 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2940 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2941 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2942 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2943 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2944 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2945 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2946 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2947 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2948 0 /* flags are only used for -march switch. */ },
2949 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2950 PTA_64BIT /* flags are only used for -march switch. */ },
2953 int const pta_size = ARRAY_SIZE (processor_alias_table);
2955 /* Set up prefix/suffix so the error messages refer to either the command
2956 line argument, or the attribute(target). */
2965 prefix = "option(\"";
2970 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2971 SUBTARGET_OVERRIDE_OPTIONS;
2974 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2975 SUBSUBTARGET_OVERRIDE_OPTIONS;
2978 /* -fPIC is the default for x86_64. */
2979 if (TARGET_MACHO && TARGET_64BIT)
2982 /* Need to check -mtune=generic first. */
2983 if (ix86_tune_string)
2985 if (!strcmp (ix86_tune_string, "generic")
2986 || !strcmp (ix86_tune_string, "i686")
2987 /* As special support for cross compilers we read -mtune=native
2988 as -mtune=generic. With native compilers we won't see the
2989 -mtune=native, as it was changed by the driver. */
2990 || !strcmp (ix86_tune_string, "native"))
2993 ix86_tune_string = "generic64";
2995 ix86_tune_string = "generic32";
2997 /* If this call is for setting the option attribute, allow the
2998 generic32/generic64 that was previously set. */
2999 else if (!main_args_p
3000 && (!strcmp (ix86_tune_string, "generic32")
3001 || !strcmp (ix86_tune_string, "generic64")))
3003 else if (!strncmp (ix86_tune_string, "generic", 7))
3004 error ("bad value (%s) for %stune=%s %s",
3005 ix86_tune_string, prefix, suffix, sw);
3006 else if (!strcmp (ix86_tune_string, "x86-64"))
3007 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3008 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3009 prefix, suffix, prefix, suffix, prefix, suffix);
3013 if (ix86_arch_string)
3014 ix86_tune_string = ix86_arch_string;
3015 if (!ix86_tune_string)
3017 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3018 ix86_tune_defaulted = 1;
3021 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3022 need to use a sensible tune option. */
3023 if (!strcmp (ix86_tune_string, "generic")
3024 || !strcmp (ix86_tune_string, "x86-64")
3025 || !strcmp (ix86_tune_string, "i686"))
3028 ix86_tune_string = "generic64";
3030 ix86_tune_string = "generic32";
3034 if (ix86_stringop_string)
3036 if (!strcmp (ix86_stringop_string, "rep_byte"))
3037 stringop_alg = rep_prefix_1_byte;
3038 else if (!strcmp (ix86_stringop_string, "libcall"))
3039 stringop_alg = libcall;
3040 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3041 stringop_alg = rep_prefix_4_byte;
3042 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3044 /* rep; movq isn't available in 32-bit code. */
3045 stringop_alg = rep_prefix_8_byte;
3046 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3047 stringop_alg = loop_1_byte;
3048 else if (!strcmp (ix86_stringop_string, "loop"))
3049 stringop_alg = loop;
3050 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3051 stringop_alg = unrolled_loop;
3053 error ("bad value (%s) for %sstringop-strategy=%s %s",
3054 ix86_stringop_string, prefix, suffix, sw);
3057 if (!ix86_arch_string)
3058 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3060 ix86_arch_specified = 1;
3062 /* Validate -mabi= value. */
3063 if (ix86_abi_string)
3065 if (strcmp (ix86_abi_string, "sysv") == 0)
3066 ix86_abi = SYSV_ABI;
3067 else if (strcmp (ix86_abi_string, "ms") == 0)
3070 error ("unknown ABI (%s) for %sabi=%s %s",
3071 ix86_abi_string, prefix, suffix, sw);
3074 ix86_abi = DEFAULT_ABI;
3076 if (ix86_cmodel_string != 0)
3078 if (!strcmp (ix86_cmodel_string, "small"))
3079 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3080 else if (!strcmp (ix86_cmodel_string, "medium"))
3081 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3082 else if (!strcmp (ix86_cmodel_string, "large"))
3083 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3085 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3086 else if (!strcmp (ix86_cmodel_string, "32"))
3087 ix86_cmodel = CM_32;
3088 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3089 ix86_cmodel = CM_KERNEL;
3091 error ("bad value (%s) for %scmodel=%s %s",
3092 ix86_cmodel_string, prefix, suffix, sw);
3096 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3097 use of rip-relative addressing. This eliminates fixups that
3098 would otherwise be needed if this object is to be placed in a
3099 DLL, and is essentially just as efficient as direct addressing. */
3100 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3101 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3102 else if (TARGET_64BIT)
3103 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3105 ix86_cmodel = CM_32;
3107 if (ix86_asm_string != 0)
3110 && !strcmp (ix86_asm_string, "intel"))
3111 ix86_asm_dialect = ASM_INTEL;
3112 else if (!strcmp (ix86_asm_string, "att"))
3113 ix86_asm_dialect = ASM_ATT;
3115 error ("bad value (%s) for %sasm=%s %s",
3116 ix86_asm_string, prefix, suffix, sw);
3118 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3119 error ("code model %qs not supported in the %s bit mode",
3120 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3121 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3122 sorry ("%i-bit mode not compiled in",
3123 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3125 for (i = 0; i < pta_size; i++)
3126 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3128 ix86_schedule = processor_alias_table[i].schedule;
3129 ix86_arch = processor_alias_table[i].processor;
3130 /* Default cpu tuning to the architecture. */
3131 ix86_tune = ix86_arch;
3133 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3134 error ("CPU you selected does not support x86-64 "
3137 if (processor_alias_table[i].flags & PTA_MMX
3138 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3139 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3140 if (processor_alias_table[i].flags & PTA_3DNOW
3141 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3142 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3143 if (processor_alias_table[i].flags & PTA_3DNOW_A
3144 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3145 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3146 if (processor_alias_table[i].flags & PTA_SSE
3147 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3148 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3149 if (processor_alias_table[i].flags & PTA_SSE2
3150 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3151 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3152 if (processor_alias_table[i].flags & PTA_SSE3
3153 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3154 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3155 if (processor_alias_table[i].flags & PTA_SSSE3
3156 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3158 if (processor_alias_table[i].flags & PTA_SSE4_1
3159 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3160 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3161 if (processor_alias_table[i].flags & PTA_SSE4_2
3162 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3163 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3164 if (processor_alias_table[i].flags & PTA_AVX
3165 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3166 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3167 if (processor_alias_table[i].flags & PTA_FMA
3168 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3169 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3170 if (processor_alias_table[i].flags & PTA_SSE4A
3171 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3172 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3173 if (processor_alias_table[i].flags & PTA_FMA4
3174 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3175 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3176 if (processor_alias_table[i].flags & PTA_XOP
3177 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3178 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3179 if (processor_alias_table[i].flags & PTA_LWP
3180 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3181 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3182 if (processor_alias_table[i].flags & PTA_ABM
3183 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3184 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3185 if (processor_alias_table[i].flags & PTA_CX16
3186 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3187 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3188 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3189 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3190 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3191 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3192 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3193 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3194 if (processor_alias_table[i].flags & PTA_MOVBE
3195 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3196 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3197 if (processor_alias_table[i].flags & PTA_AES
3198 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3199 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3200 if (processor_alias_table[i].flags & PTA_PCLMUL
3201 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3202 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3203 if (processor_alias_table[i].flags & PTA_FSGSBASE
3204 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3205 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3206 if (processor_alias_table[i].flags & PTA_RDRND
3207 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3208 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3209 if (processor_alias_table[i].flags & PTA_F16C
3210 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3211 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3212 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3213 x86_prefetch_sse = true;
3218 if (!strcmp (ix86_arch_string, "generic"))
3219 error ("generic CPU can be used only for %stune=%s %s",
3220 prefix, suffix, sw);
3221 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3222 error ("bad value (%s) for %sarch=%s %s",
3223 ix86_arch_string, prefix, suffix, sw);
3225 ix86_arch_mask = 1u << ix86_arch;
3226 for (i = 0; i < X86_ARCH_LAST; ++i)
3227 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3229 for (i = 0; i < pta_size; i++)
3230 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3232 ix86_schedule = processor_alias_table[i].schedule;
3233 ix86_tune = processor_alias_table[i].processor;
3234 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3236 if (ix86_tune_defaulted)
3238 ix86_tune_string = "x86-64";
3239 for (i = 0; i < pta_size; i++)
3240 if (! strcmp (ix86_tune_string,
3241 processor_alias_table[i].name))
3243 ix86_schedule = processor_alias_table[i].schedule;
3244 ix86_tune = processor_alias_table[i].processor;
3247 error ("CPU you selected does not support x86-64 "
3250 /* Intel CPUs have always interpreted SSE prefetch instructions as
3251 NOPs; so, we can enable SSE prefetch instructions even when
3252 -mtune (rather than -march) points us to a processor that has them.
3253 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3254 higher processors. */
3256 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3257 x86_prefetch_sse = true;
3261 if (ix86_tune_specified && i == pta_size)
3262 error ("bad value (%s) for %stune=%s %s",
3263 ix86_tune_string, prefix, suffix, sw);
3265 ix86_tune_mask = 1u << ix86_tune;
3266 for (i = 0; i < X86_TUNE_LAST; ++i)
3267 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3269 #ifndef USE_IX86_FRAME_POINTER
3270 #define USE_IX86_FRAME_POINTER 0
3273 /* Set the default values for switches whose default depends on TARGET_64BIT
3274 in case they weren't overwritten by command line options. */
3279 if (flag_omit_frame_pointer == 2)
3280 flag_omit_frame_pointer = 1;
3281 if (flag_asynchronous_unwind_tables == 2)
3282 flag_asynchronous_unwind_tables = 1;
3283 if (flag_pcc_struct_return == 2)
3284 flag_pcc_struct_return = 0;
3290 if (flag_omit_frame_pointer == 2)
3291 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3292 if (flag_asynchronous_unwind_tables == 2)
3293 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3294 if (flag_pcc_struct_return == 2)
3295 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3299 ix86_cost = &ix86_size_cost;
3301 ix86_cost = processor_target_table[ix86_tune].cost;
3303 /* Arrange to set up i386_stack_locals for all functions. */
3304 init_machine_status = ix86_init_machine_status;
3306 /* Validate -mregparm= value. */
3307 if (ix86_regparm_string)
3310 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3311 i = atoi (ix86_regparm_string);
3312 if (i < 0 || i > REGPARM_MAX)
3313 error ("%sregparm=%d%s is not between 0 and %d",
3314 prefix, i, suffix, REGPARM_MAX);
3319 ix86_regparm = REGPARM_MAX;
3321 /* If the user has provided any of the -malign-* options,
3322 warn and use that value only if -falign-* is not set.
3323 Remove this code in GCC 3.2 or later. */
3324 if (ix86_align_loops_string)
3326 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3327 prefix, suffix, suffix);
3328 if (align_loops == 0)
3330 i = atoi (ix86_align_loops_string);
3331 if (i < 0 || i > MAX_CODE_ALIGN)
3332 error ("%salign-loops=%d%s is not between 0 and %d",
3333 prefix, i, suffix, MAX_CODE_ALIGN);
3335 align_loops = 1 << i;
3339 if (ix86_align_jumps_string)
3341 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3342 prefix, suffix, suffix);
3343 if (align_jumps == 0)
3345 i = atoi (ix86_align_jumps_string);
3346 if (i < 0 || i > MAX_CODE_ALIGN)
3347 error ("%salign-loops=%d%s is not between 0 and %d",
3348 prefix, i, suffix, MAX_CODE_ALIGN);
3350 align_jumps = 1 << i;
3354 if (ix86_align_funcs_string)
3356 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3357 prefix, suffix, suffix);
3358 if (align_functions == 0)
3360 i = atoi (ix86_align_funcs_string);
3361 if (i < 0 || i > MAX_CODE_ALIGN)
3362 error ("%salign-loops=%d%s is not between 0 and %d",
3363 prefix, i, suffix, MAX_CODE_ALIGN);
3365 align_functions = 1 << i;
3369 /* Default align_* from the processor table. */
3370 if (align_loops == 0)
3372 align_loops = processor_target_table[ix86_tune].align_loop;
3373 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3375 if (align_jumps == 0)
3377 align_jumps = processor_target_table[ix86_tune].align_jump;
3378 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3380 if (align_functions == 0)
3382 align_functions = processor_target_table[ix86_tune].align_func;
3385 /* Validate -mbranch-cost= value, or provide default. */
3386 ix86_branch_cost = ix86_cost->branch_cost;
3387 if (ix86_branch_cost_string)
3389 i = atoi (ix86_branch_cost_string);
3391 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3393 ix86_branch_cost = i;
3395 if (ix86_section_threshold_string)
3397 i = atoi (ix86_section_threshold_string);
3399 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3401 ix86_section_threshold = i;
3404 if (ix86_tls_dialect_string)
3406 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3407 ix86_tls_dialect = TLS_DIALECT_GNU;
3408 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU2;
3411 error ("bad value (%s) for %stls-dialect=%s %s",
3412 ix86_tls_dialect_string, prefix, suffix, sw);
3415 if (ix87_precision_string)
3417 i = atoi (ix87_precision_string);
3418 if (i != 32 && i != 64 && i != 80)
3419 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3424 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3426 /* Enable by default the SSE and MMX builtins. Do allow the user to
3427 explicitly disable any of these. In particular, disabling SSE and
3428 MMX for kernel code is extremely useful. */
3429 if (!ix86_arch_specified)
3431 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3432 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3435 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3439 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3441 if (!ix86_arch_specified)
3443 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3445 /* i386 ABI does not specify red zone. It still makes sense to use it
3446 when programmer takes care to stack from being destroyed. */
3447 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3448 target_flags |= MASK_NO_RED_ZONE;
3451 /* Keep nonleaf frame pointers. */
3452 if (flag_omit_frame_pointer)
3453 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3454 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3455 flag_omit_frame_pointer = 1;
3457 /* If we're doing fast math, we don't care about comparison order
3458 wrt NaNs. This lets us use a shorter comparison sequence. */
3459 if (flag_finite_math_only)
3460 target_flags &= ~MASK_IEEE_FP;
3462 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3463 since the insns won't need emulation. */
3464 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3465 target_flags &= ~MASK_NO_FANCY_MATH_387;
3467 /* Likewise, if the target doesn't have a 387, or we've specified
3468 software floating point, don't use 387 inline intrinsics. */
3470 target_flags |= MASK_NO_FANCY_MATH_387;
3472 /* Turn on MMX builtins for -msse. */
3475 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3476 x86_prefetch_sse = true;
3479 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3480 if (TARGET_SSE4_2 || TARGET_ABM)
3481 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3483 /* Validate -mpreferred-stack-boundary= value or default it to
3484 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3485 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3486 if (ix86_preferred_stack_boundary_string)
3488 i = atoi (ix86_preferred_stack_boundary_string);
3489 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3490 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3491 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3493 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3496 /* Set the default value for -mstackrealign. */
3497 if (ix86_force_align_arg_pointer == -1)
3498 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3500 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3502 /* Validate -mincoming-stack-boundary= value or default it to
3503 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3504 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3505 if (ix86_incoming_stack_boundary_string)
3507 i = atoi (ix86_incoming_stack_boundary_string);
3508 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3509 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3510 i, TARGET_64BIT ? 4 : 2);
3513 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3514 ix86_incoming_stack_boundary
3515 = ix86_user_incoming_stack_boundary;
3519 /* Accept -msseregparm only if at least SSE support is enabled. */
3520 if (TARGET_SSEREGPARM
3522 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3524 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3525 if (ix86_fpmath_string != 0)
3527 if (! strcmp (ix86_fpmath_string, "387"))
3528 ix86_fpmath = FPMATH_387;
3529 else if (! strcmp (ix86_fpmath_string, "sse"))
3533 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3534 ix86_fpmath = FPMATH_387;
3537 ix86_fpmath = FPMATH_SSE;
3539 else if (! strcmp (ix86_fpmath_string, "387,sse")
3540 || ! strcmp (ix86_fpmath_string, "387+sse")
3541 || ! strcmp (ix86_fpmath_string, "sse,387")
3542 || ! strcmp (ix86_fpmath_string, "sse+387")
3543 || ! strcmp (ix86_fpmath_string, "both"))
3547 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3548 ix86_fpmath = FPMATH_387;
3550 else if (!TARGET_80387)
3552 warning (0, "387 instruction set disabled, using SSE arithmetics");
3553 ix86_fpmath = FPMATH_SSE;
3556 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3559 error ("bad value (%s) for %sfpmath=%s %s",
3560 ix86_fpmath_string, prefix, suffix, sw);
3563 /* If the i387 is disabled, then do not return values in it. */
3565 target_flags &= ~MASK_FLOAT_RETURNS;
3567 /* Use external vectorized library in vectorizing intrinsics. */
3568 if (ix86_veclibabi_string)
3570 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3571 ix86_veclib_handler = ix86_veclibabi_svml;
3572 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_acml;
3575 error ("unknown vectorization library ABI type (%s) for "
3576 "%sveclibabi=%s %s", ix86_veclibabi_string,
3577 prefix, suffix, sw);
3580 if ((!USE_IX86_FRAME_POINTER
3581 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3582 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3584 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3586 /* ??? Unwind info is not correct around the CFG unless either a frame
3587 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3588 unwind info generation to be aware of the CFG and propagating states
3590 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3591 || flag_exceptions || flag_non_call_exceptions)
3592 && flag_omit_frame_pointer
3593 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3595 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3596 warning (0, "unwind tables currently require either a frame pointer "
3597 "or %saccumulate-outgoing-args%s for correctness",
3599 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3602 /* If stack probes are required, the space used for large function
3603 arguments on the stack must also be probed, so enable
3604 -maccumulate-outgoing-args so this happens in the prologue. */
3605 if (TARGET_STACK_PROBE
3606 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3608 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3609 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3610 "for correctness", prefix, suffix);
3611 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3614 /* For sane SSE instruction set generation we need fcomi instruction.
3615 It is safe to enable all CMOVE instructions. */
3619 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3622 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3623 p = strchr (internal_label_prefix, 'X');
3624 internal_label_prefix_len = p - internal_label_prefix;
3628 /* When scheduling description is not available, disable scheduler pass
3629 so it won't slow down the compilation and make x87 code slower. */
3630 if (!TARGET_SCHEDULE)
3631 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3633 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3634 set_param_value ("simultaneous-prefetches",
3635 ix86_cost->simultaneous_prefetches);
3636 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3637 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3638 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3639 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3640 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3641 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3643 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3644 if (flag_prefetch_loop_arrays < 0
3647 && software_prefetching_beneficial_p ())
3648 flag_prefetch_loop_arrays = 1;
3650 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3651 can be optimized to ap = __builtin_next_arg (0). */
3653 targetm.expand_builtin_va_start = NULL;
3657 ix86_gen_leave = gen_leave_rex64;
3658 ix86_gen_add3 = gen_adddi3;
3659 ix86_gen_sub3 = gen_subdi3;
3660 ix86_gen_sub3_carry = gen_subdi3_carry;
3661 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3662 ix86_gen_monitor = gen_sse3_monitor64;
3663 ix86_gen_andsp = gen_anddi3;
3664 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3665 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3666 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3670 ix86_gen_leave = gen_leave;
3671 ix86_gen_add3 = gen_addsi3;
3672 ix86_gen_sub3 = gen_subsi3;
3673 ix86_gen_sub3_carry = gen_subsi3_carry;
3674 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3675 ix86_gen_monitor = gen_sse3_monitor;
3676 ix86_gen_andsp = gen_andsi3;
3677 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3678 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3679 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3683 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3685 target_flags |= MASK_CLD & ~target_flags_explicit;
3688 if (!TARGET_64BIT && flag_pic)
3690 if (flag_fentry > 0)
3691 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3694 if (flag_fentry < 0)
3696 #if defined(PROFILE_BEFORE_PROLOGUE)
3703 /* Save the initial options in case the user does function specific options */
3705 target_option_default_node = target_option_current_node
3706 = build_target_option_node ();
3709 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3712 ix86_option_override (void)
3714 ix86_option_override_internal (true);
3717 /* Update register usage after having seen the compiler flags. */
3720 ix86_conditional_register_usage (void)
3725 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3727 if (fixed_regs[i] > 1)
3728 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3729 if (call_used_regs[i] > 1)
3730 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3733 /* The PIC register, if it exists, is fixed. */
3734 j = PIC_OFFSET_TABLE_REGNUM;
3735 if (j != INVALID_REGNUM)
3736 fixed_regs[j] = call_used_regs[j] = 1;
3738 /* The MS_ABI changes the set of call-used registers. */
3739 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3741 call_used_regs[SI_REG] = 0;
3742 call_used_regs[DI_REG] = 0;
3743 call_used_regs[XMM6_REG] = 0;
3744 call_used_regs[XMM7_REG] = 0;
3745 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3746 call_used_regs[i] = 0;
3749 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3750 other call-clobbered regs for 64-bit. */
3753 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3755 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3756 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3757 && call_used_regs[i])
3758 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3761 /* If MMX is disabled, squash the registers. */
3763 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3764 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3765 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3767 /* If SSE is disabled, squash the registers. */
3769 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3770 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3771 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3773 /* If the FPU is disabled, squash the registers. */
3774 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3775 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3776 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3777 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3779 /* If 32-bit, squash the 64-bit registers. */
3782 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3784 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3790 /* Save the current options */
3793 ix86_function_specific_save (struct cl_target_option *ptr)
3795 ptr->arch = ix86_arch;
3796 ptr->schedule = ix86_schedule;
3797 ptr->tune = ix86_tune;
3798 ptr->fpmath = ix86_fpmath;
3799 ptr->branch_cost = ix86_branch_cost;
3800 ptr->tune_defaulted = ix86_tune_defaulted;
3801 ptr->arch_specified = ix86_arch_specified;
3802 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3803 ptr->target_flags_explicit = target_flags_explicit;
3805 /* The fields are char but the variables are not; make sure the
3806 values fit in the fields. */
3807 gcc_assert (ptr->arch == ix86_arch);
3808 gcc_assert (ptr->schedule == ix86_schedule);
3809 gcc_assert (ptr->tune == ix86_tune);
3810 gcc_assert (ptr->fpmath == ix86_fpmath);
3811 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3814 /* Restore the current options */
3817 ix86_function_specific_restore (struct cl_target_option *ptr)
3819 enum processor_type old_tune = ix86_tune;
3820 enum processor_type old_arch = ix86_arch;
3821 unsigned int ix86_arch_mask, ix86_tune_mask;
3824 ix86_arch = (enum processor_type) ptr->arch;
3825 ix86_schedule = (enum attr_cpu) ptr->schedule;
3826 ix86_tune = (enum processor_type) ptr->tune;
3827 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3828 ix86_branch_cost = ptr->branch_cost;
3829 ix86_tune_defaulted = ptr->tune_defaulted;
3830 ix86_arch_specified = ptr->arch_specified;
3831 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3832 target_flags_explicit = ptr->target_flags_explicit;
3834 /* Recreate the arch feature tests if the arch changed */
3835 if (old_arch != ix86_arch)
3837 ix86_arch_mask = 1u << ix86_arch;
3838 for (i = 0; i < X86_ARCH_LAST; ++i)
3839 ix86_arch_features[i]
3840 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3843 /* Recreate the tune optimization tests */
3844 if (old_tune != ix86_tune)
3846 ix86_tune_mask = 1u << ix86_tune;
3847 for (i = 0; i < X86_TUNE_LAST; ++i)
3848 ix86_tune_features[i]
3849 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3853 /* Print the current options */
3856 ix86_function_specific_print (FILE *file, int indent,
3857 struct cl_target_option *ptr)
3860 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3861 NULL, NULL, NULL, false);
3863 fprintf (file, "%*sarch = %d (%s)\n",
3866 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3867 ? cpu_names[ptr->arch]
3870 fprintf (file, "%*stune = %d (%s)\n",
3873 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3874 ? cpu_names[ptr->tune]
3877 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3878 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3879 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3880 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3884 fprintf (file, "%*s%s\n", indent, "", target_string);
3885 free (target_string);
3890 /* Inner function to process the attribute((target(...))), take an argument and
3891 set the current options from the argument. If we have a list, recursively go
3895 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3900 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3901 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3902 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3903 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3918 enum ix86_opt_type type;
3923 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3924 IX86_ATTR_ISA ("abm", OPT_mabm),
3925 IX86_ATTR_ISA ("aes", OPT_maes),
3926 IX86_ATTR_ISA ("avx", OPT_mavx),
3927 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3928 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3929 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3930 IX86_ATTR_ISA ("sse", OPT_msse),
3931 IX86_ATTR_ISA ("sse2", OPT_msse2),
3932 IX86_ATTR_ISA ("sse3", OPT_msse3),
3933 IX86_ATTR_ISA ("sse4", OPT_msse4),
3934 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3935 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3936 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3937 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3938 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3939 IX86_ATTR_ISA ("xop", OPT_mxop),
3940 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3941 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3942 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3943 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3945 /* string options */
3946 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3947 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3948 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3951 IX86_ATTR_YES ("cld",
3955 IX86_ATTR_NO ("fancy-math-387",
3956 OPT_mfancy_math_387,
3957 MASK_NO_FANCY_MATH_387),
3959 IX86_ATTR_YES ("ieee-fp",
3963 IX86_ATTR_YES ("inline-all-stringops",
3964 OPT_minline_all_stringops,
3965 MASK_INLINE_ALL_STRINGOPS),
3967 IX86_ATTR_YES ("inline-stringops-dynamically",
3968 OPT_minline_stringops_dynamically,
3969 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3971 IX86_ATTR_NO ("align-stringops",
3972 OPT_mno_align_stringops,
3973 MASK_NO_ALIGN_STRINGOPS),
3975 IX86_ATTR_YES ("recip",
3981 /* If this is a list, recurse to get the options. */
3982 if (TREE_CODE (args) == TREE_LIST)
3986 for (; args; args = TREE_CHAIN (args))
3987 if (TREE_VALUE (args)
3988 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3994 else if (TREE_CODE (args) != STRING_CST)
3997 /* Handle multiple arguments separated by commas. */
3998 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4000 while (next_optstr && *next_optstr != '\0')
4002 char *p = next_optstr;
4004 char *comma = strchr (next_optstr, ',');
4005 const char *opt_string;
4006 size_t len, opt_len;
4011 enum ix86_opt_type type = ix86_opt_unknown;
4017 len = comma - next_optstr;
4018 next_optstr = comma + 1;
4026 /* Recognize no-xxx. */
4027 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4036 /* Find the option. */
4039 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4041 type = attrs[i].type;
4042 opt_len = attrs[i].len;
4043 if (ch == attrs[i].string[0]
4044 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4045 && memcmp (p, attrs[i].string, opt_len) == 0)
4048 mask = attrs[i].mask;
4049 opt_string = attrs[i].string;
4054 /* Process the option. */
4057 error ("attribute(target(\"%s\")) is unknown", orig_p);
4061 else if (type == ix86_opt_isa)
4062 ix86_handle_option (opt, p, opt_set_p);
4064 else if (type == ix86_opt_yes || type == ix86_opt_no)
4066 if (type == ix86_opt_no)
4067 opt_set_p = !opt_set_p;
4070 target_flags |= mask;
4072 target_flags &= ~mask;
4075 else if (type == ix86_opt_str)
4079 error ("option(\"%s\") was already specified", opt_string);
4083 p_strings[opt] = xstrdup (p + opt_len);
4093 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4096 ix86_valid_target_attribute_tree (tree args)
4098 const char *orig_arch_string = ix86_arch_string;
4099 const char *orig_tune_string = ix86_tune_string;
4100 const char *orig_fpmath_string = ix86_fpmath_string;
4101 int orig_tune_defaulted = ix86_tune_defaulted;
4102 int orig_arch_specified = ix86_arch_specified;
4103 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4106 struct cl_target_option *def
4107 = TREE_TARGET_OPTION (target_option_default_node);
4109 /* Process each of the options on the chain. */
4110 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4113 /* If the changed options are different from the default, rerun
4114 ix86_option_override_internal, and then save the options away.
4115 The string options are are attribute options, and will be undone
4116 when we copy the save structure. */
4117 if (ix86_isa_flags != def->ix86_isa_flags
4118 || target_flags != def->target_flags
4119 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4120 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4121 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4123 /* If we are using the default tune= or arch=, undo the string assigned,
4124 and use the default. */
4125 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4126 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4127 else if (!orig_arch_specified)
4128 ix86_arch_string = NULL;
4130 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4131 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4132 else if (orig_tune_defaulted)
4133 ix86_tune_string = NULL;
4135 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4136 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4137 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4138 else if (!TARGET_64BIT && TARGET_SSE)
4139 ix86_fpmath_string = "sse,387";
4141 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4142 ix86_option_override_internal (false);
4144 /* Add any builtin functions with the new isa if any. */
4145 ix86_add_new_builtins (ix86_isa_flags);
4147 /* Save the current options unless we are validating options for
4149 t = build_target_option_node ();
4151 ix86_arch_string = orig_arch_string;
4152 ix86_tune_string = orig_tune_string;
4153 ix86_fpmath_string = orig_fpmath_string;
4155 /* Free up memory allocated to hold the strings */
4156 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4157 if (option_strings[i])
4158 free (option_strings[i]);
4164 /* Hook to validate attribute((target("string"))). */
4167 ix86_valid_target_attribute_p (tree fndecl,
4168 tree ARG_UNUSED (name),
4170 int ARG_UNUSED (flags))
4172 struct cl_target_option cur_target;
4174 tree old_optimize = build_optimization_node ();
4175 tree new_target, new_optimize;
4176 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4178 /* If the function changed the optimization levels as well as setting target
4179 options, start with the optimizations specified. */
4180 if (func_optimize && func_optimize != old_optimize)
4181 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4183 /* The target attributes may also change some optimization flags, so update
4184 the optimization options if necessary. */
4185 cl_target_option_save (&cur_target);
4186 new_target = ix86_valid_target_attribute_tree (args);
4187 new_optimize = build_optimization_node ();
4194 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4196 if (old_optimize != new_optimize)
4197 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4200 cl_target_option_restore (&cur_target);
4202 if (old_optimize != new_optimize)
4203 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4209 /* Hook to determine if one function can safely inline another. */
4212 ix86_can_inline_p (tree caller, tree callee)
4215 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4216 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4218 /* If callee has no option attributes, then it is ok to inline. */
4222 /* If caller has no option attributes, but callee does then it is not ok to
4224 else if (!caller_tree)
4229 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4230 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4232 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4233 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4235 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4236 != callee_opts->ix86_isa_flags)
4239 /* See if we have the same non-isa options. */
4240 else if (caller_opts->target_flags != callee_opts->target_flags)
4243 /* See if arch, tune, etc. are the same. */
4244 else if (caller_opts->arch != callee_opts->arch)
4247 else if (caller_opts->tune != callee_opts->tune)
4250 else if (caller_opts->fpmath != callee_opts->fpmath)
4253 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4264 /* Remember the last target of ix86_set_current_function. */
4265 static GTY(()) tree ix86_previous_fndecl;
4267 /* Establish appropriate back-end context for processing the function
4268 FNDECL. The argument might be NULL to indicate processing at top
4269 level, outside of any function scope. */
4271 ix86_set_current_function (tree fndecl)
4273 /* Only change the context if the function changes. This hook is called
4274 several times in the course of compiling a function, and we don't want to
4275 slow things down too much or call target_reinit when it isn't safe. */
4276 if (fndecl && fndecl != ix86_previous_fndecl)
4278 tree old_tree = (ix86_previous_fndecl
4279 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4282 tree new_tree = (fndecl
4283 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4286 ix86_previous_fndecl = fndecl;
4287 if (old_tree == new_tree)
4292 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4298 struct cl_target_option *def
4299 = TREE_TARGET_OPTION (target_option_current_node);
4301 cl_target_option_restore (def);
4308 /* Return true if this goes in large data/bss. */
4311 ix86_in_large_data_p (tree exp)
4313 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4316 /* Functions are never large data. */
4317 if (TREE_CODE (exp) == FUNCTION_DECL)
4320 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4322 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4323 if (strcmp (section, ".ldata") == 0
4324 || strcmp (section, ".lbss") == 0)
4330 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4332 /* If this is an incomplete type with size 0, then we can't put it
4333 in data because it might be too big when completed. */
4334 if (!size || size > ix86_section_threshold)
4341 /* Switch to the appropriate section for output of DECL.
4342 DECL is either a `VAR_DECL' node or a constant of some sort.
4343 RELOC indicates whether forming the initial value of DECL requires
4344 link-time relocations. */
4346 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4350 x86_64_elf_select_section (tree decl, int reloc,
4351 unsigned HOST_WIDE_INT align)
4353 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4354 && ix86_in_large_data_p (decl))
4356 const char *sname = NULL;
4357 unsigned int flags = SECTION_WRITE;
4358 switch (categorize_decl_for_section (decl, reloc))
4363 case SECCAT_DATA_REL:
4364 sname = ".ldata.rel";
4366 case SECCAT_DATA_REL_LOCAL:
4367 sname = ".ldata.rel.local";
4369 case SECCAT_DATA_REL_RO:
4370 sname = ".ldata.rel.ro";
4372 case SECCAT_DATA_REL_RO_LOCAL:
4373 sname = ".ldata.rel.ro.local";
4377 flags |= SECTION_BSS;
4380 case SECCAT_RODATA_MERGE_STR:
4381 case SECCAT_RODATA_MERGE_STR_INIT:
4382 case SECCAT_RODATA_MERGE_CONST:
4386 case SECCAT_SRODATA:
4393 /* We don't split these for medium model. Place them into
4394 default sections and hope for best. */
4399 /* We might get called with string constants, but get_named_section
4400 doesn't like them as they are not DECLs. Also, we need to set
4401 flags in that case. */
4403 return get_section (sname, flags, NULL);
4404 return get_named_section (decl, sname, reloc);
4407 return default_elf_select_section (decl, reloc, align);
4410 /* Build up a unique section name, expressed as a
4411 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4412 RELOC indicates whether the initial value of EXP requires
4413 link-time relocations. */
4415 static void ATTRIBUTE_UNUSED
4416 x86_64_elf_unique_section (tree decl, int reloc)
4418 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4419 && ix86_in_large_data_p (decl))
4421 const char *prefix = NULL;
4422 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4423 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4425 switch (categorize_decl_for_section (decl, reloc))
4428 case SECCAT_DATA_REL:
4429 case SECCAT_DATA_REL_LOCAL:
4430 case SECCAT_DATA_REL_RO:
4431 case SECCAT_DATA_REL_RO_LOCAL:
4432 prefix = one_only ? ".ld" : ".ldata";
4435 prefix = one_only ? ".lb" : ".lbss";
4438 case SECCAT_RODATA_MERGE_STR:
4439 case SECCAT_RODATA_MERGE_STR_INIT:
4440 case SECCAT_RODATA_MERGE_CONST:
4441 prefix = one_only ? ".lr" : ".lrodata";
4443 case SECCAT_SRODATA:
4450 /* We don't split these for medium model. Place them into
4451 default sections and hope for best. */
4456 const char *name, *linkonce;
4459 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4460 name = targetm.strip_name_encoding (name);
4462 /* If we're using one_only, then there needs to be a .gnu.linkonce
4463 prefix to the section name. */
4464 linkonce = one_only ? ".gnu.linkonce" : "";
4466 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4468 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4472 default_unique_section (decl, reloc);
4475 #ifdef COMMON_ASM_OP
4476 /* This says how to output assembler code to declare an
4477 uninitialized external linkage data object.
4479 For medium model x86-64 we need to use .largecomm opcode for
4482 x86_elf_aligned_common (FILE *file,
4483 const char *name, unsigned HOST_WIDE_INT size,
4486 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4487 && size > (unsigned int)ix86_section_threshold)
4488 fputs (".largecomm\t", file);
4490 fputs (COMMON_ASM_OP, file);
4491 assemble_name (file, name);
4492 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4493 size, align / BITS_PER_UNIT);
4497 /* Utility function for targets to use in implementing
4498 ASM_OUTPUT_ALIGNED_BSS. */
4501 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4502 const char *name, unsigned HOST_WIDE_INT size,
4505 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4506 && size > (unsigned int)ix86_section_threshold)
4507 switch_to_section (get_named_section (decl, ".lbss", 0));
4509 switch_to_section (bss_section);
4510 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4511 #ifdef ASM_DECLARE_OBJECT_NAME
4512 last_assemble_variable_decl = decl;
4513 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4515 /* Standard thing is just output label for the object. */
4516 ASM_OUTPUT_LABEL (file, name);
4517 #endif /* ASM_DECLARE_OBJECT_NAME */
4518 ASM_OUTPUT_SKIP (file, size ? size : 1);
4522 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4524 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4525 make the problem with not enough registers even worse. */
4526 #ifdef INSN_SCHEDULING
4528 flag_schedule_insns = 0;
4532 /* The Darwin libraries never set errno, so we might as well
4533 avoid calling them when that's the only reason we would. */
4534 flag_errno_math = 0;
4536 /* The default values of these switches depend on the TARGET_64BIT
4537 that is not known at this moment. Mark these values with 2 and
4538 let user the to override these. In case there is no command line
4539 option specifying them, we will set the defaults in
4540 ix86_option_override_internal. */
4542 flag_omit_frame_pointer = 2;
4544 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4548 flag_pcc_struct_return = 2;
4549 flag_asynchronous_unwind_tables = 2;
4550 flag_vect_cost_model = 1;
4551 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4552 SUBTARGET_OPTIMIZATION_OPTIONS;
4556 /* Decide whether we must probe the stack before any space allocation
4557 on this target. It's essentially TARGET_STACK_PROBE except when
4558 -fstack-check causes the stack to be already probed differently. */
4561 ix86_target_stack_probe (void)
4563 /* Do not probe the stack twice if static stack checking is enabled. */
4564 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4567 return TARGET_STACK_PROBE;
4570 /* Decide whether we can make a sibling call to a function. DECL is the
4571 declaration of the function being targeted by the call and EXP is the
4572 CALL_EXPR representing the call. */
4575 ix86_function_ok_for_sibcall (tree decl, tree exp)
4577 tree type, decl_or_type;
4580 /* If we are generating position-independent code, we cannot sibcall
4581 optimize any indirect call, or a direct call to a global function,
4582 as the PLT requires %ebx be live. */
4583 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4586 /* If we need to align the outgoing stack, then sibcalling would
4587 unalign the stack, which may break the called function. */
4588 if (ix86_minimum_incoming_stack_boundary (true)
4589 < PREFERRED_STACK_BOUNDARY)
4594 decl_or_type = decl;
4595 type = TREE_TYPE (decl);
4599 /* We're looking at the CALL_EXPR, we need the type of the function. */
4600 type = CALL_EXPR_FN (exp); /* pointer expression */
4601 type = TREE_TYPE (type); /* pointer type */
4602 type = TREE_TYPE (type); /* function type */
4603 decl_or_type = type;
4606 /* Check that the return value locations are the same. Like
4607 if we are returning floats on the 80387 register stack, we cannot
4608 make a sibcall from a function that doesn't return a float to a
4609 function that does or, conversely, from a function that does return
4610 a float to a function that doesn't; the necessary stack adjustment
4611 would not be executed. This is also the place we notice
4612 differences in the return value ABI. Note that it is ok for one
4613 of the functions to have void return type as long as the return
4614 value of the other is passed in a register. */
4615 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4616 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4618 if (STACK_REG_P (a) || STACK_REG_P (b))
4620 if (!rtx_equal_p (a, b))
4623 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4625 else if (!rtx_equal_p (a, b))
4630 /* The SYSV ABI has more call-clobbered registers;
4631 disallow sibcalls from MS to SYSV. */
4632 if (cfun->machine->call_abi == MS_ABI
4633 && ix86_function_type_abi (type) == SYSV_ABI)
4638 /* If this call is indirect, we'll need to be able to use a
4639 call-clobbered register for the address of the target function.
4640 Make sure that all such registers are not used for passing
4641 parameters. Note that DLLIMPORT functions are indirect. */
4643 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4645 if (ix86_function_regparm (type, NULL) >= 3)
4647 /* ??? Need to count the actual number of registers to be used,
4648 not the possible number of registers. Fix later. */
4654 /* Otherwise okay. That also includes certain types of indirect calls. */
4658 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4659 and "sseregparm" calling convention attributes;
4660 arguments as in struct attribute_spec.handler. */
4663 ix86_handle_cconv_attribute (tree *node, tree name,
4665 int flags ATTRIBUTE_UNUSED,
4668 if (TREE_CODE (*node) != FUNCTION_TYPE
4669 && TREE_CODE (*node) != METHOD_TYPE
4670 && TREE_CODE (*node) != FIELD_DECL
4671 && TREE_CODE (*node) != TYPE_DECL)
4673 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4675 *no_add_attrs = true;
4679 /* Can combine regparm with all attributes but fastcall. */
4680 if (is_attribute_p ("regparm", name))
4684 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4686 error ("fastcall and regparm attributes are not compatible");
4689 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4691 error ("regparam and thiscall attributes are not compatible");
4694 cst = TREE_VALUE (args);
4695 if (TREE_CODE (cst) != INTEGER_CST)
4697 warning (OPT_Wattributes,
4698 "%qE attribute requires an integer constant argument",
4700 *no_add_attrs = true;
4702 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4704 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4706 *no_add_attrs = true;
4714 /* Do not warn when emulating the MS ABI. */
4715 if ((TREE_CODE (*node) != FUNCTION_TYPE
4716 && TREE_CODE (*node) != METHOD_TYPE)
4717 || ix86_function_type_abi (*node) != MS_ABI)
4718 warning (OPT_Wattributes, "%qE attribute ignored",
4720 *no_add_attrs = true;
4724 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4725 if (is_attribute_p ("fastcall", name))
4727 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4729 error ("fastcall and cdecl attributes are not compatible");
4731 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4733 error ("fastcall and stdcall attributes are not compatible");
4735 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4737 error ("fastcall and regparm attributes are not compatible");
4739 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4741 error ("fastcall and thiscall attributes are not compatible");
4745 /* Can combine stdcall with fastcall (redundant), regparm and
4747 else if (is_attribute_p ("stdcall", name))
4749 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4751 error ("stdcall and cdecl attributes are not compatible");
4753 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4755 error ("stdcall and fastcall attributes are not compatible");
4757 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4759 error ("stdcall and thiscall attributes are not compatible");
4763 /* Can combine cdecl with regparm and sseregparm. */
4764 else if (is_attribute_p ("cdecl", name))
4766 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4768 error ("stdcall and cdecl attributes are not compatible");
4770 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4772 error ("fastcall and cdecl attributes are not compatible");
4774 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4776 error ("cdecl and thiscall attributes are not compatible");
4779 else if (is_attribute_p ("thiscall", name))
4781 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4782 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4784 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4786 error ("stdcall and thiscall attributes are not compatible");
4788 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4790 error ("fastcall and thiscall attributes are not compatible");
4792 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4794 error ("cdecl and thiscall attributes are not compatible");
4798 /* Can combine sseregparm with all attributes. */
4803 /* Return 0 if the attributes for two types are incompatible, 1 if they
4804 are compatible, and 2 if they are nearly compatible (which causes a
4805 warning to be generated). */
4808 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4810 /* Check for mismatch of non-default calling convention. */
4811 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4813 if (TREE_CODE (type1) != FUNCTION_TYPE
4814 && TREE_CODE (type1) != METHOD_TYPE)
4817 /* Check for mismatched fastcall/regparm types. */
4818 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4819 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4820 || (ix86_function_regparm (type1, NULL)
4821 != ix86_function_regparm (type2, NULL)))
4824 /* Check for mismatched sseregparm types. */
4825 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4826 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4829 /* Check for mismatched thiscall types. */
4830 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4831 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4834 /* Check for mismatched return types (cdecl vs stdcall). */
4835 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4836 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4842 /* Return the regparm value for a function with the indicated TYPE and DECL.
4843 DECL may be NULL when calling function indirectly
4844 or considering a libcall. */
4847 ix86_function_regparm (const_tree type, const_tree decl)
4853 return (ix86_function_type_abi (type) == SYSV_ABI
4854 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4856 regparm = ix86_regparm;
4857 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4860 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4864 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4867 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4870 /* Use register calling convention for local functions when possible. */
4872 && TREE_CODE (decl) == FUNCTION_DECL
4874 && !(profile_flag && !flag_fentry))
4876 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4877 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4880 int local_regparm, globals = 0, regno;
4882 /* Make sure no regparm register is taken by a
4883 fixed register variable. */
4884 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4885 if (fixed_regs[local_regparm])
4888 /* We don't want to use regparm(3) for nested functions as
4889 these use a static chain pointer in the third argument. */
4890 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4893 /* Each fixed register usage increases register pressure,
4894 so less registers should be used for argument passing.
4895 This functionality can be overriden by an explicit
4897 for (regno = 0; regno <= DI_REG; regno++)
4898 if (fixed_regs[regno])
4902 = globals < local_regparm ? local_regparm - globals : 0;
4904 if (local_regparm > regparm)
4905 regparm = local_regparm;
4912 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4913 DFmode (2) arguments in SSE registers for a function with the
4914 indicated TYPE and DECL. DECL may be NULL when calling function
4915 indirectly or considering a libcall. Otherwise return 0. */
4918 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4920 gcc_assert (!TARGET_64BIT);
4922 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4923 by the sseregparm attribute. */
4924 if (TARGET_SSEREGPARM
4925 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4932 error ("Calling %qD with attribute sseregparm without "
4933 "SSE/SSE2 enabled", decl);
4935 error ("Calling %qT with attribute sseregparm without "
4936 "SSE/SSE2 enabled", type);
4944 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4945 (and DFmode for SSE2) arguments in SSE registers. */
4946 if (decl && TARGET_SSE_MATH && optimize
4947 && !(profile_flag && !flag_fentry))
4949 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4950 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4952 return TARGET_SSE2 ? 2 : 1;
4958 /* Return true if EAX is live at the start of the function. Used by
4959 ix86_expand_prologue to determine if we need special help before
4960 calling allocate_stack_worker. */
4963 ix86_eax_live_at_start_p (void)
4965 /* Cheat. Don't bother working forward from ix86_function_regparm
4966 to the function type to whether an actual argument is located in
4967 eax. Instead just look at cfg info, which is still close enough
4968 to correct at this point. This gives false positives for broken
4969 functions that might use uninitialized data that happens to be
4970 allocated in eax, but who cares? */
4971 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4974 /* Value is the number of bytes of arguments automatically
4975 popped when returning from a subroutine call.
4976 FUNDECL is the declaration node of the function (as a tree),
4977 FUNTYPE is the data type of the function (as a tree),
4978 or for a library call it is an identifier node for the subroutine name.
4979 SIZE is the number of bytes of arguments passed on the stack.
4981 On the 80386, the RTD insn may be used to pop them if the number
4982 of args is fixed, but if the number is variable then the caller
4983 must pop them all. RTD can't be used for library calls now
4984 because the library is compiled with the Unix compiler.
4985 Use of RTD is a selectable option, since it is incompatible with
4986 standard Unix calling sequences. If the option is not selected,
4987 the caller must always pop the args.
4989 The attribute stdcall is equivalent to RTD on a per module basis. */
4992 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4996 /* None of the 64-bit ABIs pop arguments. */
5000 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5002 /* Cdecl functions override -mrtd, and never pop the stack. */
5003 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5005 /* Stdcall and fastcall functions will pop the stack if not
5007 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5008 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5009 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5012 if (rtd && ! stdarg_p (funtype))
5016 /* Lose any fake structure return argument if it is passed on the stack. */
5017 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5018 && !KEEP_AGGREGATE_RETURN_POINTER)
5020 int nregs = ix86_function_regparm (funtype, fundecl);
5022 return GET_MODE_SIZE (Pmode);
5028 /* Argument support functions. */
5030 /* Return true when register may be used to pass function parameters. */
5032 ix86_function_arg_regno_p (int regno)
5035 const int *parm_regs;
5040 return (regno < REGPARM_MAX
5041 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5043 return (regno < REGPARM_MAX
5044 || (TARGET_MMX && MMX_REGNO_P (regno)
5045 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5046 || (TARGET_SSE && SSE_REGNO_P (regno)
5047 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5052 if (SSE_REGNO_P (regno) && TARGET_SSE)
5057 if (TARGET_SSE && SSE_REGNO_P (regno)
5058 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5062 /* TODO: The function should depend on current function ABI but
5063 builtins.c would need updating then. Therefore we use the
5066 /* RAX is used as hidden argument to va_arg functions. */
5067 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5070 if (ix86_abi == MS_ABI)
5071 parm_regs = x86_64_ms_abi_int_parameter_registers;
5073 parm_regs = x86_64_int_parameter_registers;
5074 for (i = 0; i < (ix86_abi == MS_ABI
5075 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5076 if (regno == parm_regs[i])
5081 /* Return if we do not know how to pass TYPE solely in registers. */
5084 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5086 if (must_pass_in_stack_var_size_or_pad (mode, type))
5089 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5090 The layout_type routine is crafty and tries to trick us into passing
5091 currently unsupported vector types on the stack by using TImode. */
5092 return (!TARGET_64BIT && mode == TImode
5093 && type && TREE_CODE (type) != VECTOR_TYPE);
5096 /* It returns the size, in bytes, of the area reserved for arguments passed
5097 in registers for the function represented by fndecl dependent to the used
5100 ix86_reg_parm_stack_space (const_tree fndecl)
5102 enum calling_abi call_abi = SYSV_ABI;
5103 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5104 call_abi = ix86_function_abi (fndecl);
5106 call_abi = ix86_function_type_abi (fndecl);
5107 if (call_abi == MS_ABI)
5112 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5115 ix86_function_type_abi (const_tree fntype)
5117 if (TARGET_64BIT && fntype != NULL)
5119 enum calling_abi abi = ix86_abi;
5120 if (abi == SYSV_ABI)
5122 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5125 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5133 ix86_function_ms_hook_prologue (const_tree fn)
5135 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5137 if (decl_function_context (fn) != NULL_TREE)
5138 error_at (DECL_SOURCE_LOCATION (fn),
5139 "ms_hook_prologue is not compatible with nested function");
5146 static enum calling_abi
5147 ix86_function_abi (const_tree fndecl)
5151 return ix86_function_type_abi (TREE_TYPE (fndecl));
5154 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5157 ix86_cfun_abi (void)
5159 if (! cfun || ! TARGET_64BIT)
5161 return cfun->machine->call_abi;
5164 /* Write the extra assembler code needed to declare a function properly. */
5167 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5170 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5174 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5175 unsigned int filler_cc = 0xcccccccc;
5177 for (i = 0; i < filler_count; i += 4)
5178 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5181 ASM_OUTPUT_LABEL (asm_out_file, fname);
5183 /* Output magic byte marker, if hot-patch attribute is set. */
5188 /* leaq [%rsp + 0], %rsp */
5189 asm_fprintf (asm_out_file, ASM_BYTE
5190 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5194 /* movl.s %edi, %edi
5196 movl.s %esp, %ebp */
5197 asm_fprintf (asm_out_file, ASM_BYTE
5198 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5204 extern void init_regs (void);
5206 /* Implementation of call abi switching target hook. Specific to FNDECL
5207 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5208 for more details. */
5210 ix86_call_abi_override (const_tree fndecl)
5212 if (fndecl == NULL_TREE)
5213 cfun->machine->call_abi = ix86_abi;
5215 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5218 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5219 re-initialization of init_regs each time we switch function context since
5220 this is needed only during RTL expansion. */
5222 ix86_maybe_switch_abi (void)
5225 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5229 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5230 for a call to a function whose data type is FNTYPE.
5231 For a library call, FNTYPE is 0. */
5234 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5235 tree fntype, /* tree ptr for function decl */
5236 rtx libname, /* SYMBOL_REF of library name or 0 */
5239 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5240 memset (cum, 0, sizeof (*cum));
5243 cum->call_abi = ix86_function_abi (fndecl);
5245 cum->call_abi = ix86_function_type_abi (fntype);
5246 /* Set up the number of registers to use for passing arguments. */
5248 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5249 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5250 "or subtarget optimization implying it");
5251 cum->nregs = ix86_regparm;
5254 cum->nregs = (cum->call_abi == SYSV_ABI
5255 ? X86_64_REGPARM_MAX
5256 : X86_64_MS_REGPARM_MAX);
5260 cum->sse_nregs = SSE_REGPARM_MAX;
5263 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5264 ? X86_64_SSE_REGPARM_MAX
5265 : X86_64_MS_SSE_REGPARM_MAX);
5269 cum->mmx_nregs = MMX_REGPARM_MAX;
5270 cum->warn_avx = true;
5271 cum->warn_sse = true;
5272 cum->warn_mmx = true;
5274 /* Because type might mismatch in between caller and callee, we need to
5275 use actual type of function for local calls.
5276 FIXME: cgraph_analyze can be told to actually record if function uses
5277 va_start so for local functions maybe_vaarg can be made aggressive
5279 FIXME: once typesytem is fixed, we won't need this code anymore. */
5281 fntype = TREE_TYPE (fndecl);
5282 cum->maybe_vaarg = (fntype
5283 ? (!prototype_p (fntype) || stdarg_p (fntype))
5288 /* If there are variable arguments, then we won't pass anything
5289 in registers in 32-bit mode. */
5290 if (stdarg_p (fntype))
5301 /* Use ecx and edx registers if function has fastcall attribute,
5302 else look for regparm information. */
5305 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5308 cum->fastcall = 1; /* Same first register as in fastcall. */
5310 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5316 cum->nregs = ix86_function_regparm (fntype, fndecl);
5319 /* Set up the number of SSE registers used for passing SFmode
5320 and DFmode arguments. Warn for mismatching ABI. */
5321 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5325 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5326 But in the case of vector types, it is some vector mode.
5328 When we have only some of our vector isa extensions enabled, then there
5329 are some modes for which vector_mode_supported_p is false. For these
5330 modes, the generic vector support in gcc will choose some non-vector mode
5331 in order to implement the type. By computing the natural mode, we'll
5332 select the proper ABI location for the operand and not depend on whatever
5333 the middle-end decides to do with these vector types.
5335 The midde-end can't deal with the vector types > 16 bytes. In this
5336 case, we return the original mode and warn ABI change if CUM isn't
5339 static enum machine_mode
5340 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5342 enum machine_mode mode = TYPE_MODE (type);
5344 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5346 HOST_WIDE_INT size = int_size_in_bytes (type);
5347 if ((size == 8 || size == 16 || size == 32)
5348 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5349 && TYPE_VECTOR_SUBPARTS (type) > 1)
5351 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5353 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5354 mode = MIN_MODE_VECTOR_FLOAT;
5356 mode = MIN_MODE_VECTOR_INT;
5358 /* Get the mode which has this inner mode and number of units. */
5359 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5360 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5361 && GET_MODE_INNER (mode) == innermode)
5363 if (size == 32 && !TARGET_AVX)
5365 static bool warnedavx;
5372 warning (0, "AVX vector argument without AVX "
5373 "enabled changes the ABI");
5375 return TYPE_MODE (type);
5388 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5389 this may not agree with the mode that the type system has chosen for the
5390 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5391 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5394 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5399 if (orig_mode != BLKmode)
5400 tmp = gen_rtx_REG (orig_mode, regno);
5403 tmp = gen_rtx_REG (mode, regno);
5404 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5405 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5411 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5412 of this code is to classify each 8bytes of incoming argument by the register
5413 class and assign registers accordingly. */
5415 /* Return the union class of CLASS1 and CLASS2.
5416 See the x86-64 PS ABI for details. */
5418 static enum x86_64_reg_class
5419 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5421 /* Rule #1: If both classes are equal, this is the resulting class. */
5422 if (class1 == class2)
5425 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5427 if (class1 == X86_64_NO_CLASS)
5429 if (class2 == X86_64_NO_CLASS)
5432 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5433 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5434 return X86_64_MEMORY_CLASS;
5436 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5437 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5438 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5439 return X86_64_INTEGERSI_CLASS;
5440 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5441 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5442 return X86_64_INTEGER_CLASS;
5444 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5446 if (class1 == X86_64_X87_CLASS
5447 || class1 == X86_64_X87UP_CLASS
5448 || class1 == X86_64_COMPLEX_X87_CLASS
5449 || class2 == X86_64_X87_CLASS
5450 || class2 == X86_64_X87UP_CLASS
5451 || class2 == X86_64_COMPLEX_X87_CLASS)
5452 return X86_64_MEMORY_CLASS;
5454 /* Rule #6: Otherwise class SSE is used. */
5455 return X86_64_SSE_CLASS;
5458 /* Classify the argument of type TYPE and mode MODE.
5459 CLASSES will be filled by the register class used to pass each word
5460 of the operand. The number of words is returned. In case the parameter
5461 should be passed in memory, 0 is returned. As a special case for zero
5462 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5464 BIT_OFFSET is used internally for handling records and specifies offset
5465 of the offset in bits modulo 256 to avoid overflow cases.
5467 See the x86-64 PS ABI for details.
5471 classify_argument (enum machine_mode mode, const_tree type,
5472 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5474 HOST_WIDE_INT bytes =
5475 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5476 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5478 /* Variable sized entities are always passed/returned in memory. */
5482 if (mode != VOIDmode
5483 && targetm.calls.must_pass_in_stack (mode, type))
5486 if (type && AGGREGATE_TYPE_P (type))
5490 enum x86_64_reg_class subclasses[MAX_CLASSES];
5492 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5496 for (i = 0; i < words; i++)
5497 classes[i] = X86_64_NO_CLASS;
5499 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5500 signalize memory class, so handle it as special case. */
5503 classes[0] = X86_64_NO_CLASS;
5507 /* Classify each field of record and merge classes. */
5508 switch (TREE_CODE (type))
5511 /* And now merge the fields of structure. */
5512 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5514 if (TREE_CODE (field) == FIELD_DECL)
5518 if (TREE_TYPE (field) == error_mark_node)
5521 /* Bitfields are always classified as integer. Handle them
5522 early, since later code would consider them to be
5523 misaligned integers. */
5524 if (DECL_BIT_FIELD (field))
5526 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5527 i < ((int_bit_position (field) + (bit_offset % 64))
5528 + tree_low_cst (DECL_SIZE (field), 0)
5531 merge_classes (X86_64_INTEGER_CLASS,
5538 type = TREE_TYPE (field);
5540 /* Flexible array member is ignored. */
5541 if (TYPE_MODE (type) == BLKmode
5542 && TREE_CODE (type) == ARRAY_TYPE
5543 && TYPE_SIZE (type) == NULL_TREE
5544 && TYPE_DOMAIN (type) != NULL_TREE
5545 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5550 if (!warned && warn_psabi)
5553 inform (input_location,
5554 "The ABI of passing struct with"
5555 " a flexible array member has"
5556 " changed in GCC 4.4");
5560 num = classify_argument (TYPE_MODE (type), type,
5562 (int_bit_position (field)
5563 + bit_offset) % 256);
5566 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5567 for (i = 0; i < num && (i + pos) < words; i++)
5569 merge_classes (subclasses[i], classes[i + pos]);
5576 /* Arrays are handled as small records. */
5579 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5580 TREE_TYPE (type), subclasses, bit_offset);
5584 /* The partial classes are now full classes. */
5585 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5586 subclasses[0] = X86_64_SSE_CLASS;
5587 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5588 && !((bit_offset % 64) == 0 && bytes == 4))
5589 subclasses[0] = X86_64_INTEGER_CLASS;
5591 for (i = 0; i < words; i++)
5592 classes[i] = subclasses[i % num];
5597 case QUAL_UNION_TYPE:
5598 /* Unions are similar to RECORD_TYPE but offset is always 0.
5600 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5602 if (TREE_CODE (field) == FIELD_DECL)
5606 if (TREE_TYPE (field) == error_mark_node)
5609 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5610 TREE_TYPE (field), subclasses,
5614 for (i = 0; i < num; i++)
5615 classes[i] = merge_classes (subclasses[i], classes[i]);
5626 /* When size > 16 bytes, if the first one isn't
5627 X86_64_SSE_CLASS or any other ones aren't
5628 X86_64_SSEUP_CLASS, everything should be passed in
5630 if (classes[0] != X86_64_SSE_CLASS)
5633 for (i = 1; i < words; i++)
5634 if (classes[i] != X86_64_SSEUP_CLASS)
5638 /* Final merger cleanup. */
5639 for (i = 0; i < words; i++)
5641 /* If one class is MEMORY, everything should be passed in
5643 if (classes[i] == X86_64_MEMORY_CLASS)
5646 /* The X86_64_SSEUP_CLASS should be always preceded by
5647 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5648 if (classes[i] == X86_64_SSEUP_CLASS
5649 && classes[i - 1] != X86_64_SSE_CLASS
5650 && classes[i - 1] != X86_64_SSEUP_CLASS)
5652 /* The first one should never be X86_64_SSEUP_CLASS. */
5653 gcc_assert (i != 0);
5654 classes[i] = X86_64_SSE_CLASS;
5657 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5658 everything should be passed in memory. */
5659 if (classes[i] == X86_64_X87UP_CLASS
5660 && (classes[i - 1] != X86_64_X87_CLASS))
5664 /* The first one should never be X86_64_X87UP_CLASS. */
5665 gcc_assert (i != 0);
5666 if (!warned && warn_psabi)
5669 inform (input_location,
5670 "The ABI of passing union with long double"
5671 " has changed in GCC 4.4");
5679 /* Compute alignment needed. We align all types to natural boundaries with
5680 exception of XFmode that is aligned to 64bits. */
5681 if (mode != VOIDmode && mode != BLKmode)
5683 int mode_alignment = GET_MODE_BITSIZE (mode);
5686 mode_alignment = 128;
5687 else if (mode == XCmode)
5688 mode_alignment = 256;
5689 if (COMPLEX_MODE_P (mode))
5690 mode_alignment /= 2;
5691 /* Misaligned fields are always returned in memory. */
5692 if (bit_offset % mode_alignment)
5696 /* for V1xx modes, just use the base mode */
5697 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5698 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5699 mode = GET_MODE_INNER (mode);
5701 /* Classification of atomic types. */
5706 classes[0] = X86_64_SSE_CLASS;
5709 classes[0] = X86_64_SSE_CLASS;
5710 classes[1] = X86_64_SSEUP_CLASS;
5720 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5724 classes[0] = X86_64_INTEGERSI_CLASS;
5727 else if (size <= 64)
5729 classes[0] = X86_64_INTEGER_CLASS;
5732 else if (size <= 64+32)
5734 classes[0] = X86_64_INTEGER_CLASS;
5735 classes[1] = X86_64_INTEGERSI_CLASS;
5738 else if (size <= 64+64)
5740 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5748 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5752 /* OImode shouldn't be used directly. */
5757 if (!(bit_offset % 64))
5758 classes[0] = X86_64_SSESF_CLASS;
5760 classes[0] = X86_64_SSE_CLASS;
5763 classes[0] = X86_64_SSEDF_CLASS;
5766 classes[0] = X86_64_X87_CLASS;
5767 classes[1] = X86_64_X87UP_CLASS;
5770 classes[0] = X86_64_SSE_CLASS;
5771 classes[1] = X86_64_SSEUP_CLASS;
5774 classes[0] = X86_64_SSE_CLASS;
5775 if (!(bit_offset % 64))
5781 if (!warned && warn_psabi)
5784 inform (input_location,
5785 "The ABI of passing structure with complex float"
5786 " member has changed in GCC 4.4");
5788 classes[1] = X86_64_SSESF_CLASS;
5792 classes[0] = X86_64_SSEDF_CLASS;
5793 classes[1] = X86_64_SSEDF_CLASS;
5796 classes[0] = X86_64_COMPLEX_X87_CLASS;
5799 /* This modes is larger than 16 bytes. */
5807 classes[0] = X86_64_SSE_CLASS;
5808 classes[1] = X86_64_SSEUP_CLASS;
5809 classes[2] = X86_64_SSEUP_CLASS;
5810 classes[3] = X86_64_SSEUP_CLASS;
5818 classes[0] = X86_64_SSE_CLASS;
5819 classes[1] = X86_64_SSEUP_CLASS;
5827 classes[0] = X86_64_SSE_CLASS;
5833 gcc_assert (VECTOR_MODE_P (mode));
5838 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5840 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5841 classes[0] = X86_64_INTEGERSI_CLASS;
5843 classes[0] = X86_64_INTEGER_CLASS;
5844 classes[1] = X86_64_INTEGER_CLASS;
5845 return 1 + (bytes > 8);
5849 /* Examine the argument and return set number of register required in each
5850 class. Return 0 iff parameter should be passed in memory. */
5852 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5853 int *int_nregs, int *sse_nregs)
5855 enum x86_64_reg_class regclass[MAX_CLASSES];
5856 int n = classify_argument (mode, type, regclass, 0);
5862 for (n--; n >= 0; n--)
5863 switch (regclass[n])
5865 case X86_64_INTEGER_CLASS:
5866 case X86_64_INTEGERSI_CLASS:
5869 case X86_64_SSE_CLASS:
5870 case X86_64_SSESF_CLASS:
5871 case X86_64_SSEDF_CLASS:
5874 case X86_64_NO_CLASS:
5875 case X86_64_SSEUP_CLASS:
5877 case X86_64_X87_CLASS:
5878 case X86_64_X87UP_CLASS:
5882 case X86_64_COMPLEX_X87_CLASS:
5883 return in_return ? 2 : 0;
5884 case X86_64_MEMORY_CLASS:
5890 /* Construct container for the argument used by GCC interface. See
5891 FUNCTION_ARG for the detailed description. */
5894 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5895 const_tree type, int in_return, int nintregs, int nsseregs,
5896 const int *intreg, int sse_regno)
5898 /* The following variables hold the static issued_error state. */
5899 static bool issued_sse_arg_error;
5900 static bool issued_sse_ret_error;
5901 static bool issued_x87_ret_error;
5903 enum machine_mode tmpmode;
5905 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5906 enum x86_64_reg_class regclass[MAX_CLASSES];
5910 int needed_sseregs, needed_intregs;
5911 rtx exp[MAX_CLASSES];
5914 n = classify_argument (mode, type, regclass, 0);
5917 if (!examine_argument (mode, type, in_return, &needed_intregs,
5920 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5923 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5924 some less clueful developer tries to use floating-point anyway. */
5925 if (needed_sseregs && !TARGET_SSE)
5929 if (!issued_sse_ret_error)
5931 error ("SSE register return with SSE disabled");
5932 issued_sse_ret_error = true;
5935 else if (!issued_sse_arg_error)
5937 error ("SSE register argument with SSE disabled");
5938 issued_sse_arg_error = true;
5943 /* Likewise, error if the ABI requires us to return values in the
5944 x87 registers and the user specified -mno-80387. */
5945 if (!TARGET_80387 && in_return)
5946 for (i = 0; i < n; i++)
5947 if (regclass[i] == X86_64_X87_CLASS
5948 || regclass[i] == X86_64_X87UP_CLASS
5949 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5951 if (!issued_x87_ret_error)
5953 error ("x87 register return with x87 disabled");
5954 issued_x87_ret_error = true;
5959 /* First construct simple cases. Avoid SCmode, since we want to use
5960 single register to pass this type. */
5961 if (n == 1 && mode != SCmode)
5962 switch (regclass[0])
5964 case X86_64_INTEGER_CLASS:
5965 case X86_64_INTEGERSI_CLASS:
5966 return gen_rtx_REG (mode, intreg[0]);
5967 case X86_64_SSE_CLASS:
5968 case X86_64_SSESF_CLASS:
5969 case X86_64_SSEDF_CLASS:
5970 if (mode != BLKmode)
5971 return gen_reg_or_parallel (mode, orig_mode,
5972 SSE_REGNO (sse_regno));
5974 case X86_64_X87_CLASS:
5975 case X86_64_COMPLEX_X87_CLASS:
5976 return gen_rtx_REG (mode, FIRST_STACK_REG);
5977 case X86_64_NO_CLASS:
5978 /* Zero sized array, struct or class. */
5983 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5984 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5985 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5987 && regclass[0] == X86_64_SSE_CLASS
5988 && regclass[1] == X86_64_SSEUP_CLASS
5989 && regclass[2] == X86_64_SSEUP_CLASS
5990 && regclass[3] == X86_64_SSEUP_CLASS
5992 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5995 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5996 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5997 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5998 && regclass[1] == X86_64_INTEGER_CLASS
5999 && (mode == CDImode || mode == TImode || mode == TFmode)
6000 && intreg[0] + 1 == intreg[1])
6001 return gen_rtx_REG (mode, intreg[0]);
6003 /* Otherwise figure out the entries of the PARALLEL. */
6004 for (i = 0; i < n; i++)
6008 switch (regclass[i])
6010 case X86_64_NO_CLASS:
6012 case X86_64_INTEGER_CLASS:
6013 case X86_64_INTEGERSI_CLASS:
6014 /* Merge TImodes on aligned occasions here too. */
6015 if (i * 8 + 8 > bytes)
6016 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6017 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6021 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6022 if (tmpmode == BLKmode)
6024 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6025 gen_rtx_REG (tmpmode, *intreg),
6029 case X86_64_SSESF_CLASS:
6030 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6031 gen_rtx_REG (SFmode,
6032 SSE_REGNO (sse_regno)),
6036 case X86_64_SSEDF_CLASS:
6037 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6038 gen_rtx_REG (DFmode,
6039 SSE_REGNO (sse_regno)),
6043 case X86_64_SSE_CLASS:
6051 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6061 && regclass[1] == X86_64_SSEUP_CLASS
6062 && regclass[2] == X86_64_SSEUP_CLASS
6063 && regclass[3] == X86_64_SSEUP_CLASS);
6070 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6071 gen_rtx_REG (tmpmode,
6072 SSE_REGNO (sse_regno)),
6081 /* Empty aligned struct, union or class. */
6085 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6086 for (i = 0; i < nexps; i++)
6087 XVECEXP (ret, 0, i) = exp [i];
6091 /* Update the data in CUM to advance over an argument of mode MODE
6092 and data type TYPE. (TYPE is null for libcalls where that information
6093 may not be available.) */
6096 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6097 const_tree type, HOST_WIDE_INT bytes,
6098 HOST_WIDE_INT words)
6114 cum->words += words;
6115 cum->nregs -= words;
6116 cum->regno += words;
6118 if (cum->nregs <= 0)
6126 /* OImode shouldn't be used directly. */
6130 if (cum->float_in_sse < 2)
6133 if (cum->float_in_sse < 1)
6150 if (!type || !AGGREGATE_TYPE_P (type))
6152 cum->sse_words += words;
6153 cum->sse_nregs -= 1;
6154 cum->sse_regno += 1;
6155 if (cum->sse_nregs <= 0)
6169 if (!type || !AGGREGATE_TYPE_P (type))
6171 cum->mmx_words += words;
6172 cum->mmx_nregs -= 1;
6173 cum->mmx_regno += 1;
6174 if (cum->mmx_nregs <= 0)
6185 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6186 const_tree type, HOST_WIDE_INT words, bool named)
6188 int int_nregs, sse_nregs;
6190 /* Unnamed 256bit vector mode parameters are passed on stack. */
6191 if (!named && VALID_AVX256_REG_MODE (mode))
6194 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6195 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6197 cum->nregs -= int_nregs;
6198 cum->sse_nregs -= sse_nregs;
6199 cum->regno += int_nregs;
6200 cum->sse_regno += sse_nregs;
6204 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6205 cum->words = (cum->words + align - 1) & ~(align - 1);
6206 cum->words += words;
6211 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6212 HOST_WIDE_INT words)
6214 /* Otherwise, this should be passed indirect. */
6215 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6217 cum->words += words;
6225 /* Update the data in CUM to advance over an argument of mode MODE and
6226 data type TYPE. (TYPE is null for libcalls where that information
6227 may not be available.) */
6230 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6231 const_tree type, bool named)
6233 HOST_WIDE_INT bytes, words;
6235 if (mode == BLKmode)
6236 bytes = int_size_in_bytes (type);
6238 bytes = GET_MODE_SIZE (mode);
6239 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6242 mode = type_natural_mode (type, NULL);
6244 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6245 function_arg_advance_ms_64 (cum, bytes, words);
6246 else if (TARGET_64BIT)
6247 function_arg_advance_64 (cum, mode, type, words, named);
6249 function_arg_advance_32 (cum, mode, type, bytes, words);
6252 /* Define where to put the arguments to a function.
6253 Value is zero to push the argument on the stack,
6254 or a hard register in which to store the argument.
6256 MODE is the argument's machine mode.
6257 TYPE is the data type of the argument (as a tree).
6258 This is null for libcalls where that information may
6260 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6261 the preceding args and about the function being called.
6262 NAMED is nonzero if this argument is a named parameter
6263 (otherwise it is an extra parameter matching an ellipsis). */
6266 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6267 enum machine_mode orig_mode, const_tree type,
6268 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6270 static bool warnedsse, warnedmmx;
6272 /* Avoid the AL settings for the Unix64 ABI. */
6273 if (mode == VOIDmode)
6289 if (words <= cum->nregs)
6291 int regno = cum->regno;
6293 /* Fastcall allocates the first two DWORD (SImode) or
6294 smaller arguments to ECX and EDX if it isn't an
6300 || (type && AGGREGATE_TYPE_P (type)))
6303 /* ECX not EAX is the first allocated register. */
6304 if (regno == AX_REG)
6307 return gen_rtx_REG (mode, regno);
6312 if (cum->float_in_sse < 2)
6315 if (cum->float_in_sse < 1)
6319 /* In 32bit, we pass TImode in xmm registers. */
6326 if (!type || !AGGREGATE_TYPE_P (type))
6328 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6331 warning (0, "SSE vector argument without SSE enabled "
6335 return gen_reg_or_parallel (mode, orig_mode,
6336 cum->sse_regno + FIRST_SSE_REG);
6341 /* OImode shouldn't be used directly. */
6350 if (!type || !AGGREGATE_TYPE_P (type))
6353 return gen_reg_or_parallel (mode, orig_mode,
6354 cum->sse_regno + FIRST_SSE_REG);
6364 if (!type || !AGGREGATE_TYPE_P (type))
6366 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6369 warning (0, "MMX vector argument without MMX enabled "
6373 return gen_reg_or_parallel (mode, orig_mode,
6374 cum->mmx_regno + FIRST_MMX_REG);
6383 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6384 enum machine_mode orig_mode, const_tree type, bool named)
6386 /* Handle a hidden AL argument containing number of registers
6387 for varargs x86-64 functions. */
6388 if (mode == VOIDmode)
6389 return GEN_INT (cum->maybe_vaarg
6390 ? (cum->sse_nregs < 0
6391 ? X86_64_SSE_REGPARM_MAX
6406 /* Unnamed 256bit vector mode parameters are passed on stack. */
6412 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6414 &x86_64_int_parameter_registers [cum->regno],
6419 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6420 enum machine_mode orig_mode, bool named,
6421 HOST_WIDE_INT bytes)
6425 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6426 We use value of -2 to specify that current function call is MSABI. */
6427 if (mode == VOIDmode)
6428 return GEN_INT (-2);
6430 /* If we've run out of registers, it goes on the stack. */
6431 if (cum->nregs == 0)
6434 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6436 /* Only floating point modes are passed in anything but integer regs. */
6437 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6440 regno = cum->regno + FIRST_SSE_REG;
6445 /* Unnamed floating parameters are passed in both the
6446 SSE and integer registers. */
6447 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6448 t2 = gen_rtx_REG (mode, regno);
6449 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6450 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6451 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6454 /* Handle aggregated types passed in register. */
6455 if (orig_mode == BLKmode)
6457 if (bytes > 0 && bytes <= 8)
6458 mode = (bytes > 4 ? DImode : SImode);
6459 if (mode == BLKmode)
6463 return gen_reg_or_parallel (mode, orig_mode, regno);
6466 /* Return where to put the arguments to a function.
6467 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6469 MODE is the argument's machine mode. TYPE is the data type of the
6470 argument. It is null for libcalls where that information may not be
6471 available. CUM gives information about the preceding args and about
6472 the function being called. NAMED is nonzero if this argument is a
6473 named parameter (otherwise it is an extra parameter matching an
6477 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6478 const_tree type, bool named)
6480 enum machine_mode mode = omode;
6481 HOST_WIDE_INT bytes, words;
6483 if (mode == BLKmode)
6484 bytes = int_size_in_bytes (type);
6486 bytes = GET_MODE_SIZE (mode);
6487 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6489 /* To simplify the code below, represent vector types with a vector mode
6490 even if MMX/SSE are not active. */
6491 if (type && TREE_CODE (type) == VECTOR_TYPE)
6492 mode = type_natural_mode (type, cum);
6494 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6495 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6496 else if (TARGET_64BIT)
6497 return function_arg_64 (cum, mode, omode, type, named);
6499 return function_arg_32 (cum, mode, omode, type, bytes, words);
6502 /* A C expression that indicates when an argument must be passed by
6503 reference. If nonzero for an argument, a copy of that argument is
6504 made in memory and a pointer to the argument is passed instead of
6505 the argument itself. The pointer is passed in whatever way is
6506 appropriate for passing a pointer to that type. */
6509 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6510 enum machine_mode mode ATTRIBUTE_UNUSED,
6511 const_tree type, bool named ATTRIBUTE_UNUSED)
6513 /* See Windows x64 Software Convention. */
6514 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6516 int msize = (int) GET_MODE_SIZE (mode);
6519 /* Arrays are passed by reference. */
6520 if (TREE_CODE (type) == ARRAY_TYPE)
6523 if (AGGREGATE_TYPE_P (type))
6525 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6526 are passed by reference. */
6527 msize = int_size_in_bytes (type);
6531 /* __m128 is passed by reference. */
6533 case 1: case 2: case 4: case 8:
6539 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6545 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6548 contains_aligned_value_p (const_tree type)
6550 enum machine_mode mode = TYPE_MODE (type);
6551 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6555 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6557 if (TYPE_ALIGN (type) < 128)
6560 if (AGGREGATE_TYPE_P (type))
6562 /* Walk the aggregates recursively. */
6563 switch (TREE_CODE (type))
6567 case QUAL_UNION_TYPE:
6571 /* Walk all the structure fields. */
6572 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6574 if (TREE_CODE (field) == FIELD_DECL
6575 && contains_aligned_value_p (TREE_TYPE (field)))
6582 /* Just for use if some languages passes arrays by value. */
6583 if (contains_aligned_value_p (TREE_TYPE (type)))
6594 /* Gives the alignment boundary, in bits, of an argument with the
6595 specified mode and type. */
6598 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6603 /* Since the main variant type is used for call, we convert it to
6604 the main variant type. */
6605 type = TYPE_MAIN_VARIANT (type);
6606 align = TYPE_ALIGN (type);
6609 align = GET_MODE_ALIGNMENT (mode);
6610 if (align < PARM_BOUNDARY)
6611 align = PARM_BOUNDARY;
6612 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6613 natural boundaries. */
6614 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6616 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6617 make an exception for SSE modes since these require 128bit
6620 The handling here differs from field_alignment. ICC aligns MMX
6621 arguments to 4 byte boundaries, while structure fields are aligned
6622 to 8 byte boundaries. */
6625 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6626 align = PARM_BOUNDARY;
6630 if (!contains_aligned_value_p (type))
6631 align = PARM_BOUNDARY;
6634 if (align > BIGGEST_ALIGNMENT)
6635 align = BIGGEST_ALIGNMENT;
6639 /* Return true if N is a possible register number of function value. */
6642 ix86_function_value_regno_p (const unsigned int regno)
6649 case FIRST_FLOAT_REG:
6650 /* TODO: The function should depend on current function ABI but
6651 builtins.c would need updating then. Therefore we use the
6653 if (TARGET_64BIT && ix86_abi == MS_ABI)
6655 return TARGET_FLOAT_RETURNS_IN_80387;
6661 if (TARGET_MACHO || TARGET_64BIT)
6669 /* Define how to find the value returned by a function.
6670 VALTYPE is the data type of the value (as a tree).
6671 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6672 otherwise, FUNC is 0. */
6675 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6676 const_tree fntype, const_tree fn)
6680 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6681 we normally prevent this case when mmx is not available. However
6682 some ABIs may require the result to be returned like DImode. */
6683 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6684 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6686 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6687 we prevent this case when sse is not available. However some ABIs
6688 may require the result to be returned like integer TImode. */
6689 else if (mode == TImode
6690 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6691 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6693 /* 32-byte vector modes in %ymm0. */
6694 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6695 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6697 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6698 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6699 regno = FIRST_FLOAT_REG;
6701 /* Most things go in %eax. */
6704 /* Override FP return register with %xmm0 for local functions when
6705 SSE math is enabled or for functions with sseregparm attribute. */
6706 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6708 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6709 if ((sse_level >= 1 && mode == SFmode)
6710 || (sse_level == 2 && mode == DFmode))
6711 regno = FIRST_SSE_REG;
6714 /* OImode shouldn't be used directly. */
6715 gcc_assert (mode != OImode);
6717 return gen_rtx_REG (orig_mode, regno);
6721 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6726 /* Handle libcalls, which don't provide a type node. */
6727 if (valtype == NULL)
6739 return gen_rtx_REG (mode, FIRST_SSE_REG);
6742 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6746 return gen_rtx_REG (mode, AX_REG);
6750 ret = construct_container (mode, orig_mode, valtype, 1,
6751 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6752 x86_64_int_return_registers, 0);
6754 /* For zero sized structures, construct_container returns NULL, but we
6755 need to keep rest of compiler happy by returning meaningful value. */
6757 ret = gen_rtx_REG (orig_mode, AX_REG);
6763 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6765 unsigned int regno = AX_REG;
6769 switch (GET_MODE_SIZE (mode))
6772 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6773 && !COMPLEX_MODE_P (mode))
6774 regno = FIRST_SSE_REG;
6778 if (mode == SFmode || mode == DFmode)
6779 regno = FIRST_SSE_REG;
6785 return gen_rtx_REG (orig_mode, regno);
6789 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6790 enum machine_mode orig_mode, enum machine_mode mode)
6792 const_tree fn, fntype;
6795 if (fntype_or_decl && DECL_P (fntype_or_decl))
6796 fn = fntype_or_decl;
6797 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6799 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6800 return function_value_ms_64 (orig_mode, mode);
6801 else if (TARGET_64BIT)
6802 return function_value_64 (orig_mode, mode, valtype);
6804 return function_value_32 (orig_mode, mode, fntype, fn);
6808 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6809 bool outgoing ATTRIBUTE_UNUSED)
6811 enum machine_mode mode, orig_mode;
6813 orig_mode = TYPE_MODE (valtype);
6814 mode = type_natural_mode (valtype, NULL);
6815 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6819 ix86_libcall_value (enum machine_mode mode)
6821 return ix86_function_value_1 (NULL, NULL, mode, mode);
6824 /* Return true iff type is returned in memory. */
6826 static bool ATTRIBUTE_UNUSED
6827 return_in_memory_32 (const_tree type, enum machine_mode mode)
6831 if (mode == BLKmode)
6834 size = int_size_in_bytes (type);
6836 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6839 if (VECTOR_MODE_P (mode) || mode == TImode)
6841 /* User-created vectors small enough to fit in EAX. */
6845 /* MMX/3dNow values are returned in MM0,
6846 except when it doesn't exits. */
6850 /* SSE values are returned in XMM0, except when it doesn't exist. */
6854 /* AVX values are returned in YMM0, except when it doesn't exist. */
6865 /* OImode shouldn't be used directly. */
6866 gcc_assert (mode != OImode);
6871 static bool ATTRIBUTE_UNUSED
6872 return_in_memory_64 (const_tree type, enum machine_mode mode)
6874 int needed_intregs, needed_sseregs;
6875 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6878 static bool ATTRIBUTE_UNUSED
6879 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6881 HOST_WIDE_INT size = int_size_in_bytes (type);
6883 /* __m128 is returned in xmm0. */
6884 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6885 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6888 /* Otherwise, the size must be exactly in [1248]. */
6889 return size != 1 && size != 2 && size != 4 && size != 8;
6893 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6895 #ifdef SUBTARGET_RETURN_IN_MEMORY
6896 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6898 const enum machine_mode mode = type_natural_mode (type, NULL);
6902 if (ix86_function_type_abi (fntype) == MS_ABI)
6903 return return_in_memory_ms_64 (type, mode);
6905 return return_in_memory_64 (type, mode);
6908 return return_in_memory_32 (type, mode);
6912 /* Return false iff TYPE is returned in memory. This version is used
6913 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6914 but differs notably in that when MMX is available, 8-byte vectors
6915 are returned in memory, rather than in MMX registers. */
6918 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6921 enum machine_mode mode = type_natural_mode (type, NULL);
6924 return return_in_memory_64 (type, mode);
6926 if (mode == BLKmode)
6929 size = int_size_in_bytes (type);
6931 if (VECTOR_MODE_P (mode))
6933 /* Return in memory only if MMX registers *are* available. This
6934 seems backwards, but it is consistent with the existing
6941 else if (mode == TImode)
6943 else if (mode == XFmode)
6949 /* When returning SSE vector types, we have a choice of either
6950 (1) being abi incompatible with a -march switch, or
6951 (2) generating an error.
6952 Given no good solution, I think the safest thing is one warning.
6953 The user won't be able to use -Werror, but....
6955 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6956 called in response to actually generating a caller or callee that
6957 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6958 via aggregate_value_p for general type probing from tree-ssa. */
6961 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6963 static bool warnedsse, warnedmmx;
6965 if (!TARGET_64BIT && type)
6967 /* Look at the return type of the function, not the function type. */
6968 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6970 if (!TARGET_SSE && !warnedsse)
6973 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6976 warning (0, "SSE vector return without SSE enabled "
6981 if (!TARGET_MMX && !warnedmmx)
6983 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6986 warning (0, "MMX vector return without MMX enabled "
6996 /* Create the va_list data type. */
6998 /* Returns the calling convention specific va_list date type.
6999 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7002 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7004 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7006 /* For i386 we use plain pointer to argument area. */
7007 if (!TARGET_64BIT || abi == MS_ABI)
7008 return build_pointer_type (char_type_node);
7010 record = lang_hooks.types.make_type (RECORD_TYPE);
7011 type_decl = build_decl (BUILTINS_LOCATION,
7012 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7014 f_gpr = build_decl (BUILTINS_LOCATION,
7015 FIELD_DECL, get_identifier ("gp_offset"),
7016 unsigned_type_node);
7017 f_fpr = build_decl (BUILTINS_LOCATION,
7018 FIELD_DECL, get_identifier ("fp_offset"),
7019 unsigned_type_node);
7020 f_ovf = build_decl (BUILTINS_LOCATION,
7021 FIELD_DECL, get_identifier ("overflow_arg_area"),
7023 f_sav = build_decl (BUILTINS_LOCATION,
7024 FIELD_DECL, get_identifier ("reg_save_area"),
7027 va_list_gpr_counter_field = f_gpr;
7028 va_list_fpr_counter_field = f_fpr;
7030 DECL_FIELD_CONTEXT (f_gpr) = record;
7031 DECL_FIELD_CONTEXT (f_fpr) = record;
7032 DECL_FIELD_CONTEXT (f_ovf) = record;
7033 DECL_FIELD_CONTEXT (f_sav) = record;
7035 TREE_CHAIN (record) = type_decl;
7036 TYPE_NAME (record) = type_decl;
7037 TYPE_FIELDS (record) = f_gpr;
7038 DECL_CHAIN (f_gpr) = f_fpr;
7039 DECL_CHAIN (f_fpr) = f_ovf;
7040 DECL_CHAIN (f_ovf) = f_sav;
7042 layout_type (record);
7044 /* The correct type is an array type of one element. */
7045 return build_array_type (record, build_index_type (size_zero_node));
7048 /* Setup the builtin va_list data type and for 64-bit the additional
7049 calling convention specific va_list data types. */
7052 ix86_build_builtin_va_list (void)
7054 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7056 /* Initialize abi specific va_list builtin types. */
7060 if (ix86_abi == MS_ABI)
7062 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7063 if (TREE_CODE (t) != RECORD_TYPE)
7064 t = build_variant_type_copy (t);
7065 sysv_va_list_type_node = t;
7070 if (TREE_CODE (t) != RECORD_TYPE)
7071 t = build_variant_type_copy (t);
7072 sysv_va_list_type_node = t;
7074 if (ix86_abi != MS_ABI)
7076 t = ix86_build_builtin_va_list_abi (MS_ABI);
7077 if (TREE_CODE (t) != RECORD_TYPE)
7078 t = build_variant_type_copy (t);
7079 ms_va_list_type_node = t;
7084 if (TREE_CODE (t) != RECORD_TYPE)
7085 t = build_variant_type_copy (t);
7086 ms_va_list_type_node = t;
7093 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7096 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7102 /* GPR size of varargs save area. */
7103 if (cfun->va_list_gpr_size)
7104 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7106 ix86_varargs_gpr_size = 0;
7108 /* FPR size of varargs save area. We don't need it if we don't pass
7109 anything in SSE registers. */
7110 if (TARGET_SSE && cfun->va_list_fpr_size)
7111 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7113 ix86_varargs_fpr_size = 0;
7115 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7118 save_area = frame_pointer_rtx;
7119 set = get_varargs_alias_set ();
7121 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7122 if (max > X86_64_REGPARM_MAX)
7123 max = X86_64_REGPARM_MAX;
7125 for (i = cum->regno; i < max; i++)
7127 mem = gen_rtx_MEM (Pmode,
7128 plus_constant (save_area, i * UNITS_PER_WORD));
7129 MEM_NOTRAP_P (mem) = 1;
7130 set_mem_alias_set (mem, set);
7131 emit_move_insn (mem, gen_rtx_REG (Pmode,
7132 x86_64_int_parameter_registers[i]));
7135 if (ix86_varargs_fpr_size)
7137 enum machine_mode smode;
7140 /* Now emit code to save SSE registers. The AX parameter contains number
7141 of SSE parameter registers used to call this function, though all we
7142 actually check here is the zero/non-zero status. */
7144 label = gen_label_rtx ();
7145 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7146 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7149 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7150 we used movdqa (i.e. TImode) instead? Perhaps even better would
7151 be if we could determine the real mode of the data, via a hook
7152 into pass_stdarg. Ignore all that for now. */
7154 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7155 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7157 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7158 if (max > X86_64_SSE_REGPARM_MAX)
7159 max = X86_64_SSE_REGPARM_MAX;
7161 for (i = cum->sse_regno; i < max; ++i)
7163 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7164 mem = gen_rtx_MEM (smode, mem);
7165 MEM_NOTRAP_P (mem) = 1;
7166 set_mem_alias_set (mem, set);
7167 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7169 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7177 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7179 alias_set_type set = get_varargs_alias_set ();
7182 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7186 mem = gen_rtx_MEM (Pmode,
7187 plus_constant (virtual_incoming_args_rtx,
7188 i * UNITS_PER_WORD));
7189 MEM_NOTRAP_P (mem) = 1;
7190 set_mem_alias_set (mem, set);
7192 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7193 emit_move_insn (mem, reg);
7198 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7199 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7202 CUMULATIVE_ARGS next_cum;
7205 /* This argument doesn't appear to be used anymore. Which is good,
7206 because the old code here didn't suppress rtl generation. */
7207 gcc_assert (!no_rtl);
7212 fntype = TREE_TYPE (current_function_decl);
7214 /* For varargs, we do not want to skip the dummy va_dcl argument.
7215 For stdargs, we do want to skip the last named argument. */
7217 if (stdarg_p (fntype))
7218 ix86_function_arg_advance (&next_cum, mode, type, true);
7220 if (cum->call_abi == MS_ABI)
7221 setup_incoming_varargs_ms_64 (&next_cum);
7223 setup_incoming_varargs_64 (&next_cum);
7226 /* Checks if TYPE is of kind va_list char *. */
7229 is_va_list_char_pointer (tree type)
7233 /* For 32-bit it is always true. */
7236 canonic = ix86_canonical_va_list_type (type);
7237 return (canonic == ms_va_list_type_node
7238 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7241 /* Implement va_start. */
7244 ix86_va_start (tree valist, rtx nextarg)
7246 HOST_WIDE_INT words, n_gpr, n_fpr;
7247 tree f_gpr, f_fpr, f_ovf, f_sav;
7248 tree gpr, fpr, ovf, sav, t;
7251 /* Only 64bit target needs something special. */
7252 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7254 std_expand_builtin_va_start (valist, nextarg);
7258 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7259 f_fpr = DECL_CHAIN (f_gpr);
7260 f_ovf = DECL_CHAIN (f_fpr);
7261 f_sav = DECL_CHAIN (f_ovf);
7263 valist = build_simple_mem_ref (valist);
7264 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7265 /* The following should be folded into the MEM_REF offset. */
7266 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7268 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7270 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7272 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7275 /* Count number of gp and fp argument registers used. */
7276 words = crtl->args.info.words;
7277 n_gpr = crtl->args.info.regno;
7278 n_fpr = crtl->args.info.sse_regno;
7280 if (cfun->va_list_gpr_size)
7282 type = TREE_TYPE (gpr);
7283 t = build2 (MODIFY_EXPR, type,
7284 gpr, build_int_cst (type, n_gpr * 8));
7285 TREE_SIDE_EFFECTS (t) = 1;
7286 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7289 if (TARGET_SSE && cfun->va_list_fpr_size)
7291 type = TREE_TYPE (fpr);
7292 t = build2 (MODIFY_EXPR, type, fpr,
7293 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7294 TREE_SIDE_EFFECTS (t) = 1;
7295 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7298 /* Find the overflow area. */
7299 type = TREE_TYPE (ovf);
7300 t = make_tree (type, crtl->args.internal_arg_pointer);
7302 t = build2 (POINTER_PLUS_EXPR, type, t,
7303 size_int (words * UNITS_PER_WORD));
7304 t = build2 (MODIFY_EXPR, type, ovf, t);
7305 TREE_SIDE_EFFECTS (t) = 1;
7306 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7308 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7310 /* Find the register save area.
7311 Prologue of the function save it right above stack frame. */
7312 type = TREE_TYPE (sav);
7313 t = make_tree (type, frame_pointer_rtx);
7314 if (!ix86_varargs_gpr_size)
7315 t = build2 (POINTER_PLUS_EXPR, type, t,
7316 size_int (-8 * X86_64_REGPARM_MAX));
7317 t = build2 (MODIFY_EXPR, type, sav, t);
7318 TREE_SIDE_EFFECTS (t) = 1;
7319 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7323 /* Implement va_arg. */
7326 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7329 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7330 tree f_gpr, f_fpr, f_ovf, f_sav;
7331 tree gpr, fpr, ovf, sav, t;
7333 tree lab_false, lab_over = NULL_TREE;
7338 enum machine_mode nat_mode;
7339 unsigned int arg_boundary;
7341 /* Only 64bit target needs something special. */
7342 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7343 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7345 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7346 f_fpr = DECL_CHAIN (f_gpr);
7347 f_ovf = DECL_CHAIN (f_fpr);
7348 f_sav = DECL_CHAIN (f_ovf);
7350 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7351 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7352 valist = build_va_arg_indirect_ref (valist);
7353 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7354 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7355 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7357 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7359 type = build_pointer_type (type);
7360 size = int_size_in_bytes (type);
7361 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7363 nat_mode = type_natural_mode (type, NULL);
7372 /* Unnamed 256bit vector mode parameters are passed on stack. */
7373 if (ix86_cfun_abi () == SYSV_ABI)
7380 container = construct_container (nat_mode, TYPE_MODE (type),
7381 type, 0, X86_64_REGPARM_MAX,
7382 X86_64_SSE_REGPARM_MAX, intreg,
7387 /* Pull the value out of the saved registers. */
7389 addr = create_tmp_var (ptr_type_node, "addr");
7393 int needed_intregs, needed_sseregs;
7395 tree int_addr, sse_addr;
7397 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7398 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7400 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7402 need_temp = (!REG_P (container)
7403 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7404 || TYPE_ALIGN (type) > 128));
7406 /* In case we are passing structure, verify that it is consecutive block
7407 on the register save area. If not we need to do moves. */
7408 if (!need_temp && !REG_P (container))
7410 /* Verify that all registers are strictly consecutive */
7411 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7415 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7417 rtx slot = XVECEXP (container, 0, i);
7418 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7419 || INTVAL (XEXP (slot, 1)) != i * 16)
7427 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7429 rtx slot = XVECEXP (container, 0, i);
7430 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7431 || INTVAL (XEXP (slot, 1)) != i * 8)
7443 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7444 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7447 /* First ensure that we fit completely in registers. */
7450 t = build_int_cst (TREE_TYPE (gpr),
7451 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7452 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7453 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7454 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7455 gimplify_and_add (t, pre_p);
7459 t = build_int_cst (TREE_TYPE (fpr),
7460 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7461 + X86_64_REGPARM_MAX * 8);
7462 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7463 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7464 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7465 gimplify_and_add (t, pre_p);
7468 /* Compute index to start of area used for integer regs. */
7471 /* int_addr = gpr + sav; */
7472 t = fold_convert (sizetype, gpr);
7473 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7474 gimplify_assign (int_addr, t, pre_p);
7478 /* sse_addr = fpr + sav; */
7479 t = fold_convert (sizetype, fpr);
7480 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7481 gimplify_assign (sse_addr, t, pre_p);
7485 int i, prev_size = 0;
7486 tree temp = create_tmp_var (type, "va_arg_tmp");
7489 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7490 gimplify_assign (addr, t, pre_p);
7492 for (i = 0; i < XVECLEN (container, 0); i++)
7494 rtx slot = XVECEXP (container, 0, i);
7495 rtx reg = XEXP (slot, 0);
7496 enum machine_mode mode = GET_MODE (reg);
7502 tree dest_addr, dest;
7503 int cur_size = GET_MODE_SIZE (mode);
7505 if (prev_size + cur_size > size)
7507 cur_size = size - prev_size;
7508 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7509 if (mode == BLKmode)
7512 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7513 if (mode == GET_MODE (reg))
7514 addr_type = build_pointer_type (piece_type);
7516 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7518 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7521 if (SSE_REGNO_P (REGNO (reg)))
7523 src_addr = sse_addr;
7524 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7528 src_addr = int_addr;
7529 src_offset = REGNO (reg) * 8;
7531 src_addr = fold_convert (addr_type, src_addr);
7532 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7533 size_int (src_offset));
7535 dest_addr = fold_convert (daddr_type, addr);
7536 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7537 size_int (INTVAL (XEXP (slot, 1))));
7538 if (cur_size == GET_MODE_SIZE (mode))
7540 src = build_va_arg_indirect_ref (src_addr);
7541 dest = build_va_arg_indirect_ref (dest_addr);
7543 gimplify_assign (dest, src, pre_p);
7548 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7549 3, dest_addr, src_addr,
7550 size_int (cur_size));
7551 gimplify_and_add (copy, pre_p);
7553 prev_size += cur_size;
7559 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7560 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7561 gimplify_assign (gpr, t, pre_p);
7566 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7567 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7568 gimplify_assign (fpr, t, pre_p);
7571 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7573 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7576 /* ... otherwise out of the overflow area. */
7578 /* When we align parameter on stack for caller, if the parameter
7579 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7580 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7581 here with caller. */
7582 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7583 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7584 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7586 /* Care for on-stack alignment if needed. */
7587 if (arg_boundary <= 64 || size == 0)
7591 HOST_WIDE_INT align = arg_boundary / 8;
7592 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7593 size_int (align - 1));
7594 t = fold_convert (sizetype, t);
7595 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7597 t = fold_convert (TREE_TYPE (ovf), t);
7600 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7601 gimplify_assign (addr, t, pre_p);
7603 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7604 size_int (rsize * UNITS_PER_WORD));
7605 gimplify_assign (unshare_expr (ovf), t, pre_p);
7608 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7610 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7611 addr = fold_convert (ptrtype, addr);
7614 addr = build_va_arg_indirect_ref (addr);
7615 return build_va_arg_indirect_ref (addr);
7618 /* Return true if OPNUM's MEM should be matched
7619 in movabs* patterns. */
7622 ix86_check_movabs (rtx insn, int opnum)
7626 set = PATTERN (insn);
7627 if (GET_CODE (set) == PARALLEL)
7628 set = XVECEXP (set, 0, 0);
7629 gcc_assert (GET_CODE (set) == SET);
7630 mem = XEXP (set, opnum);
7631 while (GET_CODE (mem) == SUBREG)
7632 mem = SUBREG_REG (mem);
7633 gcc_assert (MEM_P (mem));
7634 return volatile_ok || !MEM_VOLATILE_P (mem);
7637 /* Initialize the table of extra 80387 mathematical constants. */
7640 init_ext_80387_constants (void)
7642 static const char * cst[5] =
7644 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7645 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7646 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7647 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7648 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7652 for (i = 0; i < 5; i++)
7654 real_from_string (&ext_80387_constants_table[i], cst[i]);
7655 /* Ensure each constant is rounded to XFmode precision. */
7656 real_convert (&ext_80387_constants_table[i],
7657 XFmode, &ext_80387_constants_table[i]);
7660 ext_80387_constants_init = 1;
7663 /* Return non-zero if the constant is something that
7664 can be loaded with a special instruction. */
7667 standard_80387_constant_p (rtx x)
7669 enum machine_mode mode = GET_MODE (x);
7673 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7676 if (x == CONST0_RTX (mode))
7678 if (x == CONST1_RTX (mode))
7681 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7683 /* For XFmode constants, try to find a special 80387 instruction when
7684 optimizing for size or on those CPUs that benefit from them. */
7686 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7690 if (! ext_80387_constants_init)
7691 init_ext_80387_constants ();
7693 for (i = 0; i < 5; i++)
7694 if (real_identical (&r, &ext_80387_constants_table[i]))
7698 /* Load of the constant -0.0 or -1.0 will be split as
7699 fldz;fchs or fld1;fchs sequence. */
7700 if (real_isnegzero (&r))
7702 if (real_identical (&r, &dconstm1))
7708 /* Return the opcode of the special instruction to be used to load
7712 standard_80387_constant_opcode (rtx x)
7714 switch (standard_80387_constant_p (x))
7738 /* Return the CONST_DOUBLE representing the 80387 constant that is
7739 loaded by the specified special instruction. The argument IDX
7740 matches the return value from standard_80387_constant_p. */
7743 standard_80387_constant_rtx (int idx)
7747 if (! ext_80387_constants_init)
7748 init_ext_80387_constants ();
7764 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7768 /* Return 1 if X is all 0s and 2 if x is all 1s
7769 in supported SSE vector mode. */
7772 standard_sse_constant_p (rtx x)
7774 enum machine_mode mode = GET_MODE (x);
7776 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7778 if (vector_all_ones_operand (x, mode))
7794 /* Return the opcode of the special instruction to be used to load
7798 standard_sse_constant_opcode (rtx insn, rtx x)
7800 switch (standard_sse_constant_p (x))
7803 switch (get_attr_mode (insn))
7806 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7808 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7809 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7811 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7813 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7814 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7816 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7818 return "vxorps\t%x0, %x0, %x0";
7820 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7821 return "vxorps\t%x0, %x0, %x0";
7823 return "vxorpd\t%x0, %x0, %x0";
7825 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7826 return "vxorps\t%x0, %x0, %x0";
7828 return "vpxor\t%x0, %x0, %x0";
7833 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7840 /* Returns true if OP contains a symbol reference */
7843 symbolic_reference_mentioned_p (rtx op)
7848 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7851 fmt = GET_RTX_FORMAT (GET_CODE (op));
7852 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7858 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7859 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7863 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7870 /* Return true if it is appropriate to emit `ret' instructions in the
7871 body of a function. Do this only if the epilogue is simple, needing a
7872 couple of insns. Prior to reloading, we can't tell how many registers
7873 must be saved, so return false then. Return false if there is no frame
7874 marker to de-allocate. */
7877 ix86_can_use_return_insn_p (void)
7879 struct ix86_frame frame;
7881 if (! reload_completed || frame_pointer_needed)
7884 /* Don't allow more than 32k pop, since that's all we can do
7885 with one instruction. */
7886 if (crtl->args.pops_args && crtl->args.size >= 32768)
7889 ix86_compute_frame_layout (&frame);
7890 return (frame.stack_pointer_offset == UNITS_PER_WORD
7891 && (frame.nregs + frame.nsseregs) == 0);
7894 /* Value should be nonzero if functions must have frame pointers.
7895 Zero means the frame pointer need not be set up (and parms may
7896 be accessed via the stack pointer) in functions that seem suitable. */
7899 ix86_frame_pointer_required (void)
7901 /* If we accessed previous frames, then the generated code expects
7902 to be able to access the saved ebp value in our frame. */
7903 if (cfun->machine->accesses_prev_frame)
7906 /* Several x86 os'es need a frame pointer for other reasons,
7907 usually pertaining to setjmp. */
7908 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7911 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7912 turns off the frame pointer by default. Turn it back on now if
7913 we've not got a leaf function. */
7914 if (TARGET_OMIT_LEAF_FRAME_POINTER
7915 && (!current_function_is_leaf
7916 || ix86_current_function_calls_tls_descriptor))
7919 if (crtl->profile && !flag_fentry)
7925 /* Record that the current function accesses previous call frames. */
7928 ix86_setup_frame_addresses (void)
7930 cfun->machine->accesses_prev_frame = 1;
7933 #ifndef USE_HIDDEN_LINKONCE
7934 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7935 # define USE_HIDDEN_LINKONCE 1
7937 # define USE_HIDDEN_LINKONCE 0
7941 static int pic_labels_used;
7943 /* Fills in the label name that should be used for a pc thunk for
7944 the given register. */
7947 get_pc_thunk_name (char name[32], unsigned int regno)
7949 gcc_assert (!TARGET_64BIT);
7951 if (USE_HIDDEN_LINKONCE)
7952 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7954 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7958 /* This function generates code for -fpic that loads %ebx with
7959 the return address of the caller and then returns. */
7962 ix86_code_end (void)
7967 for (regno = 0; regno < 8; ++regno)
7972 if (! ((pic_labels_used >> regno) & 1))
7975 get_pc_thunk_name (name, regno);
7977 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7978 get_identifier (name),
7979 build_function_type (void_type_node, void_list_node));
7980 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7981 NULL_TREE, void_type_node);
7982 TREE_PUBLIC (decl) = 1;
7983 TREE_STATIC (decl) = 1;
7988 switch_to_section (darwin_sections[text_coal_section]);
7989 fputs ("\t.weak_definition\t", asm_out_file);
7990 assemble_name (asm_out_file, name);
7991 fputs ("\n\t.private_extern\t", asm_out_file);
7992 assemble_name (asm_out_file, name);
7993 putc ('\n', asm_out_file);
7994 ASM_OUTPUT_LABEL (asm_out_file, name);
7995 DECL_WEAK (decl) = 1;
7999 if (USE_HIDDEN_LINKONCE)
8001 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8003 targetm.asm_out.unique_section (decl, 0);
8004 switch_to_section (get_named_section (decl, NULL, 0));
8006 targetm.asm_out.globalize_label (asm_out_file, name);
8007 fputs ("\t.hidden\t", asm_out_file);
8008 assemble_name (asm_out_file, name);
8009 putc ('\n', asm_out_file);
8010 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8014 switch_to_section (text_section);
8015 ASM_OUTPUT_LABEL (asm_out_file, name);
8018 DECL_INITIAL (decl) = make_node (BLOCK);
8019 current_function_decl = decl;
8020 init_function_start (decl);
8021 first_function_block_is_cold = false;
8022 /* Make sure unwind info is emitted for the thunk if needed. */
8023 final_start_function (emit_barrier (), asm_out_file, 1);
8025 xops[0] = gen_rtx_REG (Pmode, regno);
8026 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8027 /* Pad stack IP move with 4 instructions. 2 NOPs count as 1
8029 if (TARGET_PAD_SHORT_FUNCTION)
8030 output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop",
8032 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8033 output_asm_insn ("ret", xops);
8034 final_end_function ();
8035 init_insn_lengths ();
8036 free_after_compilation (cfun);
8038 current_function_decl = NULL;
8042 /* Emit code for the SET_GOT patterns. */
8045 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8051 if (TARGET_VXWORKS_RTP && flag_pic)
8053 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8054 xops[2] = gen_rtx_MEM (Pmode,
8055 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8056 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8058 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8059 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8060 an unadorned address. */
8061 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8062 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8063 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8067 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8069 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8071 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8074 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8077 output_asm_insn ("call\t%a2", xops);
8078 #ifdef DWARF2_UNWIND_INFO
8079 /* The call to next label acts as a push. */
8080 if (dwarf2out_do_frame ())
8084 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8085 gen_rtx_PLUS (Pmode,
8088 RTX_FRAME_RELATED_P (insn) = 1;
8089 dwarf2out_frame_debug (insn, true);
8096 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8097 is what will be referenced by the Mach-O PIC subsystem. */
8099 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8102 targetm.asm_out.internal_label (asm_out_file, "L",
8103 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8107 output_asm_insn ("pop%z0\t%0", xops);
8108 #ifdef DWARF2_UNWIND_INFO
8109 /* The pop is a pop and clobbers dest, but doesn't restore it
8110 for unwind info purposes. */
8111 if (dwarf2out_do_frame ())
8115 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8116 dwarf2out_frame_debug (insn, true);
8117 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8118 gen_rtx_PLUS (Pmode,
8121 RTX_FRAME_RELATED_P (insn) = 1;
8122 dwarf2out_frame_debug (insn, true);
8131 get_pc_thunk_name (name, REGNO (dest));
8132 pic_labels_used |= 1 << REGNO (dest);
8134 #ifdef DWARF2_UNWIND_INFO
8135 /* Ensure all queued register saves are flushed before the
8137 if (dwarf2out_do_frame ())
8138 dwarf2out_flush_queued_reg_saves ();
8140 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8141 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8142 output_asm_insn ("call\t%X2", xops);
8143 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8144 is what will be referenced by the Mach-O PIC subsystem. */
8147 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8149 targetm.asm_out.internal_label (asm_out_file, "L",
8150 CODE_LABEL_NUMBER (label));
8157 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8158 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8160 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8165 /* Generate an "push" pattern for input ARG. */
8170 struct machine_function *m = cfun->machine;
8172 if (m->fs.cfa_reg == stack_pointer_rtx)
8173 m->fs.cfa_offset += UNITS_PER_WORD;
8174 m->fs.sp_offset += UNITS_PER_WORD;
8176 return gen_rtx_SET (VOIDmode,
8178 gen_rtx_PRE_DEC (Pmode,
8179 stack_pointer_rtx)),
8183 /* Generate an "pop" pattern for input ARG. */
8188 return gen_rtx_SET (VOIDmode,
8191 gen_rtx_POST_INC (Pmode,
8192 stack_pointer_rtx)));
8195 /* Return >= 0 if there is an unused call-clobbered register available
8196 for the entire function. */
8199 ix86_select_alt_pic_regnum (void)
8201 if (current_function_is_leaf
8203 && !ix86_current_function_calls_tls_descriptor)
8206 /* Can't use the same register for both PIC and DRAP. */
8208 drap = REGNO (crtl->drap_reg);
8211 for (i = 2; i >= 0; --i)
8212 if (i != drap && !df_regs_ever_live_p (i))
8216 return INVALID_REGNUM;
8219 /* Return 1 if we need to save REGNO. */
8221 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8223 if (pic_offset_table_rtx
8224 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8225 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8227 || crtl->calls_eh_return
8228 || crtl->uses_const_pool))
8230 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8235 if (crtl->calls_eh_return && maybe_eh_return)
8240 unsigned test = EH_RETURN_DATA_REGNO (i);
8241 if (test == INVALID_REGNUM)
8248 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8251 return (df_regs_ever_live_p (regno)
8252 && !call_used_regs[regno]
8253 && !fixed_regs[regno]
8254 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8257 /* Return number of saved general prupose registers. */
8260 ix86_nsaved_regs (void)
8265 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8266 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8271 /* Return number of saved SSE registrers. */
8274 ix86_nsaved_sseregs (void)
8279 if (ix86_cfun_abi () != MS_ABI)
8281 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8282 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8287 /* Given FROM and TO register numbers, say whether this elimination is
8288 allowed. If stack alignment is needed, we can only replace argument
8289 pointer with hard frame pointer, or replace frame pointer with stack
8290 pointer. Otherwise, frame pointer elimination is automatically
8291 handled and all other eliminations are valid. */
8294 ix86_can_eliminate (const int from, const int to)
8296 if (stack_realign_fp)
8297 return ((from == ARG_POINTER_REGNUM
8298 && to == HARD_FRAME_POINTER_REGNUM)
8299 || (from == FRAME_POINTER_REGNUM
8300 && to == STACK_POINTER_REGNUM));
8302 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8305 /* Return the offset between two registers, one to be eliminated, and the other
8306 its replacement, at the start of a routine. */
8309 ix86_initial_elimination_offset (int from, int to)
8311 struct ix86_frame frame;
8312 ix86_compute_frame_layout (&frame);
8314 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8315 return frame.hard_frame_pointer_offset;
8316 else if (from == FRAME_POINTER_REGNUM
8317 && to == HARD_FRAME_POINTER_REGNUM)
8318 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8321 gcc_assert (to == STACK_POINTER_REGNUM);
8323 if (from == ARG_POINTER_REGNUM)
8324 return frame.stack_pointer_offset;
8326 gcc_assert (from == FRAME_POINTER_REGNUM);
8327 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8331 /* In a dynamically-aligned function, we can't know the offset from
8332 stack pointer to frame pointer, so we must ensure that setjmp
8333 eliminates fp against the hard fp (%ebp) rather than trying to
8334 index from %esp up to the top of the frame across a gap that is
8335 of unknown (at compile-time) size. */
8337 ix86_builtin_setjmp_frame_value (void)
8339 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8342 /* Fill structure ix86_frame about frame of currently computed function. */
8345 ix86_compute_frame_layout (struct ix86_frame *frame)
8347 unsigned int stack_alignment_needed;
8348 HOST_WIDE_INT offset;
8349 unsigned int preferred_alignment;
8350 HOST_WIDE_INT size = get_frame_size ();
8351 HOST_WIDE_INT to_allocate;
8353 frame->nregs = ix86_nsaved_regs ();
8354 frame->nsseregs = ix86_nsaved_sseregs ();
8356 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8357 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8359 /* MS ABI seem to require stack alignment to be always 16 except for function
8360 prologues and leaf. */
8361 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8362 && (!current_function_is_leaf || cfun->calls_alloca != 0
8363 || ix86_current_function_calls_tls_descriptor))
8365 preferred_alignment = 16;
8366 stack_alignment_needed = 16;
8367 crtl->preferred_stack_boundary = 128;
8368 crtl->stack_alignment_needed = 128;
8371 gcc_assert (!size || stack_alignment_needed);
8372 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8373 gcc_assert (preferred_alignment <= stack_alignment_needed);
8375 /* During reload iteration the amount of registers saved can change.
8376 Recompute the value as needed. Do not recompute when amount of registers
8377 didn't change as reload does multiple calls to the function and does not
8378 expect the decision to change within single iteration. */
8379 if (!optimize_function_for_size_p (cfun)
8380 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8382 int count = frame->nregs;
8383 struct cgraph_node *node = cgraph_node (current_function_decl);
8385 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8386 /* The fast prologue uses move instead of push to save registers. This
8387 is significantly longer, but also executes faster as modern hardware
8388 can execute the moves in parallel, but can't do that for push/pop.
8390 Be careful about choosing what prologue to emit: When function takes
8391 many instructions to execute we may use slow version as well as in
8392 case function is known to be outside hot spot (this is known with
8393 feedback only). Weight the size of function by number of registers
8394 to save as it is cheap to use one or two push instructions but very
8395 slow to use many of them. */
8397 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8398 if (node->frequency < NODE_FREQUENCY_NORMAL
8399 || (flag_branch_probabilities
8400 && node->frequency < NODE_FREQUENCY_HOT))
8401 cfun->machine->use_fast_prologue_epilogue = false;
8403 cfun->machine->use_fast_prologue_epilogue
8404 = !expensive_function_p (count);
8406 if (TARGET_PROLOGUE_USING_MOVE
8407 && cfun->machine->use_fast_prologue_epilogue)
8408 frame->save_regs_using_mov = true;
8410 frame->save_regs_using_mov = false;
8412 /* If static stack checking is enabled and done with probes, the registers
8413 need to be saved before allocating the frame. */
8414 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8415 frame->save_regs_using_mov = false;
8417 /* Skip return address. */
8418 offset = UNITS_PER_WORD;
8420 /* Skip pushed static chain. */
8421 if (ix86_static_chain_on_stack)
8422 offset += UNITS_PER_WORD;
8424 /* Skip saved base pointer. */
8425 if (frame_pointer_needed)
8426 offset += UNITS_PER_WORD;
8428 frame->hard_frame_pointer_offset = offset;
8430 /* Register save area */
8431 offset += frame->nregs * UNITS_PER_WORD;
8432 frame->reg_save_offset = offset;
8434 /* Align and set SSE register save area. */
8435 if (frame->nsseregs)
8437 /* The only ABI that has saved SSE registers (Win64) also has a
8438 16-byte aligned default stack, and thus we don't need to be
8439 within the re-aligned local stack frame to save them. */
8440 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8441 offset = (offset + 16 - 1) & -16;
8442 offset += frame->nsseregs * 16;
8444 frame->sse_reg_save_offset = offset;
8446 /* The re-aligned stack starts here. Values before this point are not
8447 directly comparable with values below this point. In order to make
8448 sure that no value happens to be the same before and after, force
8449 the alignment computation below to add a non-zero value. */
8450 if (stack_realign_fp)
8451 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8454 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8455 offset += frame->va_arg_size;
8457 /* Align start of frame for local function. */
8458 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8460 /* Frame pointer points here. */
8461 frame->frame_pointer_offset = offset;
8465 /* Add outgoing arguments area. Can be skipped if we eliminated
8466 all the function calls as dead code.
8467 Skipping is however impossible when function calls alloca. Alloca
8468 expander assumes that last crtl->outgoing_args_size
8469 of stack frame are unused. */
8470 if (ACCUMULATE_OUTGOING_ARGS
8471 && (!current_function_is_leaf || cfun->calls_alloca
8472 || ix86_current_function_calls_tls_descriptor))
8474 offset += crtl->outgoing_args_size;
8475 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8478 frame->outgoing_arguments_size = 0;
8480 /* Align stack boundary. Only needed if we're calling another function
8482 if (!current_function_is_leaf || cfun->calls_alloca
8483 || ix86_current_function_calls_tls_descriptor)
8484 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8486 /* We've reached end of stack frame. */
8487 frame->stack_pointer_offset = offset;
8489 /* Size prologue needs to allocate. */
8490 to_allocate = offset - frame->sse_reg_save_offset;
8492 if ((!to_allocate && frame->nregs <= 1)
8493 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8494 frame->save_regs_using_mov = false;
8496 if (ix86_using_red_zone ()
8497 && current_function_sp_is_unchanging
8498 && current_function_is_leaf
8499 && !ix86_current_function_calls_tls_descriptor)
8501 frame->red_zone_size = to_allocate;
8502 if (frame->save_regs_using_mov)
8503 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8504 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8505 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8508 frame->red_zone_size = 0;
8509 frame->stack_pointer_offset -= frame->red_zone_size;
8512 /* This is semi-inlined memory_address_length, but simplified
8513 since we know that we're always dealing with reg+offset, and
8514 to avoid having to create and discard all that rtl. */
8517 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8523 /* EBP and R13 cannot be encoded without an offset. */
8524 len = (regno == BP_REG || regno == R13_REG);
8526 else if (IN_RANGE (offset, -128, 127))
8529 /* ESP and R12 must be encoded with a SIB byte. */
8530 if (regno == SP_REG || regno == R12_REG)
8536 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8537 The valid base registers are taken from CFUN->MACHINE->FS. */
8540 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8542 const struct machine_function *m = cfun->machine;
8543 rtx base_reg = NULL;
8544 HOST_WIDE_INT base_offset = 0;
8546 if (m->use_fast_prologue_epilogue)
8548 /* Choose the base register most likely to allow the most scheduling
8549 opportunities. Generally FP is valid througout the function,
8550 while DRAP must be reloaded within the epilogue. But choose either
8551 over the SP due to increased encoding size. */
8555 base_reg = hard_frame_pointer_rtx;
8556 base_offset = m->fs.fp_offset - cfa_offset;
8558 else if (m->fs.drap_valid)
8560 base_reg = crtl->drap_reg;
8561 base_offset = 0 - cfa_offset;
8563 else if (m->fs.sp_valid)
8565 base_reg = stack_pointer_rtx;
8566 base_offset = m->fs.sp_offset - cfa_offset;
8571 HOST_WIDE_INT toffset;
8574 /* Choose the base register with the smallest address encoding.
8575 With a tie, choose FP > DRAP > SP. */
8578 base_reg = stack_pointer_rtx;
8579 base_offset = m->fs.sp_offset - cfa_offset;
8580 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8582 if (m->fs.drap_valid)
8584 toffset = 0 - cfa_offset;
8585 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8588 base_reg = crtl->drap_reg;
8589 base_offset = toffset;
8595 toffset = m->fs.fp_offset - cfa_offset;
8596 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8599 base_reg = hard_frame_pointer_rtx;
8600 base_offset = toffset;
8605 gcc_assert (base_reg != NULL);
8607 return plus_constant (base_reg, base_offset);
8610 /* Emit code to save registers in the prologue. */
8613 ix86_emit_save_regs (void)
8618 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8619 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8621 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8622 RTX_FRAME_RELATED_P (insn) = 1;
8626 /* Emit a single register save at CFA - CFA_OFFSET. */
8629 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8630 HOST_WIDE_INT cfa_offset)
8632 struct machine_function *m = cfun->machine;
8633 rtx reg = gen_rtx_REG (mode, regno);
8634 rtx mem, addr, base, insn;
8636 addr = choose_baseaddr (cfa_offset);
8637 mem = gen_frame_mem (mode, addr);
8639 /* For SSE saves, we need to indicate the 128-bit alignment. */
8640 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8642 insn = emit_move_insn (mem, reg);
8643 RTX_FRAME_RELATED_P (insn) = 1;
8646 if (GET_CODE (base) == PLUS)
8647 base = XEXP (base, 0);
8648 gcc_checking_assert (REG_P (base));
8650 /* When saving registers into a re-aligned local stack frame, avoid
8651 any tricky guessing by dwarf2out. */
8652 if (m->fs.realigned)
8654 gcc_checking_assert (stack_realign_drap);
8656 if (regno == REGNO (crtl->drap_reg))
8658 /* A bit of a hack. We force the DRAP register to be saved in
8659 the re-aligned stack frame, which provides us with a copy
8660 of the CFA that will last past the prologue. Install it. */
8661 gcc_checking_assert (cfun->machine->fs.fp_valid);
8662 addr = plus_constant (hard_frame_pointer_rtx,
8663 cfun->machine->fs.fp_offset - cfa_offset);
8664 mem = gen_rtx_MEM (mode, addr);
8665 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8669 /* The frame pointer is a stable reference within the
8670 aligned frame. Use it. */
8671 gcc_checking_assert (cfun->machine->fs.fp_valid);
8672 addr = plus_constant (hard_frame_pointer_rtx,
8673 cfun->machine->fs.fp_offset - cfa_offset);
8674 mem = gen_rtx_MEM (mode, addr);
8675 add_reg_note (insn, REG_CFA_EXPRESSION,
8676 gen_rtx_SET (VOIDmode, mem, reg));
8680 /* The memory may not be relative to the current CFA register,
8681 which means that we may need to generate a new pattern for
8682 use by the unwind info. */
8683 else if (base != m->fs.cfa_reg)
8685 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8686 mem = gen_rtx_MEM (mode, addr);
8687 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8691 /* Emit code to save registers using MOV insns.
8692 First register is stored at CFA - CFA_OFFSET. */
8694 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8698 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8699 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8701 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8702 cfa_offset -= UNITS_PER_WORD;
8706 /* Emit code to save SSE registers using MOV insns.
8707 First register is stored at CFA - CFA_OFFSET. */
8709 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8713 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8714 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8716 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8721 static GTY(()) rtx queued_cfa_restores;
8723 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8724 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8725 Don't add the note if the previously saved value will be left untouched
8726 within stack red-zone till return, as unwinders can find the same value
8727 in the register and on the stack. */
8730 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8732 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8737 add_reg_note (insn, REG_CFA_RESTORE, reg);
8738 RTX_FRAME_RELATED_P (insn) = 1;
8742 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8745 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8748 ix86_add_queued_cfa_restore_notes (rtx insn)
8751 if (!queued_cfa_restores)
8753 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8755 XEXP (last, 1) = REG_NOTES (insn);
8756 REG_NOTES (insn) = queued_cfa_restores;
8757 queued_cfa_restores = NULL_RTX;
8758 RTX_FRAME_RELATED_P (insn) = 1;
8761 /* Expand prologue or epilogue stack adjustment.
8762 The pattern exist to put a dependency on all ebp-based memory accesses.
8763 STYLE should be negative if instructions should be marked as frame related,
8764 zero if %r11 register is live and cannot be freely used and positive
8768 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8769 int style, bool set_cfa)
8771 struct machine_function *m = cfun->machine;
8775 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8776 else if (x86_64_immediate_operand (offset, DImode))
8777 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8781 /* r11 is used by indirect sibcall return as well, set before the
8782 epilogue and used after the epilogue. */
8784 tmp = gen_rtx_REG (DImode, R11_REG);
8787 gcc_assert (src != hard_frame_pointer_rtx
8788 && dest != hard_frame_pointer_rtx);
8789 tmp = hard_frame_pointer_rtx;
8791 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8793 RTX_FRAME_RELATED_P (insn) = 1;
8794 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8799 ix86_add_queued_cfa_restore_notes (insn);
8805 gcc_assert (m->fs.cfa_reg == src);
8806 m->fs.cfa_offset += INTVAL (offset);
8807 m->fs.cfa_reg = dest;
8809 r = gen_rtx_PLUS (Pmode, src, offset);
8810 r = gen_rtx_SET (VOIDmode, dest, r);
8811 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8812 RTX_FRAME_RELATED_P (insn) = 1;
8815 RTX_FRAME_RELATED_P (insn) = 1;
8817 if (dest == stack_pointer_rtx)
8819 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8820 bool valid = m->fs.sp_valid;
8822 if (src == hard_frame_pointer_rtx)
8824 valid = m->fs.fp_valid;
8825 ooffset = m->fs.fp_offset;
8827 else if (src == crtl->drap_reg)
8829 valid = m->fs.drap_valid;
8834 /* Else there are two possibilities: SP itself, which we set
8835 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8836 taken care of this by hand along the eh_return path. */
8837 gcc_checking_assert (src == stack_pointer_rtx
8838 || offset == const0_rtx);
8841 m->fs.sp_offset = ooffset - INTVAL (offset);
8842 m->fs.sp_valid = valid;
8846 /* Find an available register to be used as dynamic realign argument
8847 pointer regsiter. Such a register will be written in prologue and
8848 used in begin of body, so it must not be
8849 1. parameter passing register.
8851 We reuse static-chain register if it is available. Otherwise, we
8852 use DI for i386 and R13 for x86-64. We chose R13 since it has
8855 Return: the regno of chosen register. */
8858 find_drap_reg (void)
8860 tree decl = cfun->decl;
8864 /* Use R13 for nested function or function need static chain.
8865 Since function with tail call may use any caller-saved
8866 registers in epilogue, DRAP must not use caller-saved
8867 register in such case. */
8868 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8875 /* Use DI for nested function or function need static chain.
8876 Since function with tail call may use any caller-saved
8877 registers in epilogue, DRAP must not use caller-saved
8878 register in such case. */
8879 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8882 /* Reuse static chain register if it isn't used for parameter
8884 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8885 && !lookup_attribute ("fastcall",
8886 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8887 && !lookup_attribute ("thiscall",
8888 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8895 /* Return minimum incoming stack alignment. */
8898 ix86_minimum_incoming_stack_boundary (bool sibcall)
8900 unsigned int incoming_stack_boundary;
8902 /* Prefer the one specified at command line. */
8903 if (ix86_user_incoming_stack_boundary)
8904 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8905 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8906 if -mstackrealign is used, it isn't used for sibcall check and
8907 estimated stack alignment is 128bit. */
8910 && ix86_force_align_arg_pointer
8911 && crtl->stack_alignment_estimated == 128)
8912 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8914 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8916 /* Incoming stack alignment can be changed on individual functions
8917 via force_align_arg_pointer attribute. We use the smallest
8918 incoming stack boundary. */
8919 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8920 && lookup_attribute (ix86_force_align_arg_pointer_string,
8921 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8922 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8924 /* The incoming stack frame has to be aligned at least at
8925 parm_stack_boundary. */
8926 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8927 incoming_stack_boundary = crtl->parm_stack_boundary;
8929 /* Stack at entrance of main is aligned by runtime. We use the
8930 smallest incoming stack boundary. */
8931 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8932 && DECL_NAME (current_function_decl)
8933 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8934 && DECL_FILE_SCOPE_P (current_function_decl))
8935 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8937 return incoming_stack_boundary;
8940 /* Update incoming stack boundary and estimated stack alignment. */
8943 ix86_update_stack_boundary (void)
8945 ix86_incoming_stack_boundary
8946 = ix86_minimum_incoming_stack_boundary (false);
8948 /* x86_64 vararg needs 16byte stack alignment for register save
8952 && crtl->stack_alignment_estimated < 128)
8953 crtl->stack_alignment_estimated = 128;
8956 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8957 needed or an rtx for DRAP otherwise. */
8960 ix86_get_drap_rtx (void)
8962 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8963 crtl->need_drap = true;
8965 if (stack_realign_drap)
8967 /* Assign DRAP to vDRAP and returns vDRAP */
8968 unsigned int regno = find_drap_reg ();
8973 arg_ptr = gen_rtx_REG (Pmode, regno);
8974 crtl->drap_reg = arg_ptr;
8977 drap_vreg = copy_to_reg (arg_ptr);
8981 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8984 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8985 RTX_FRAME_RELATED_P (insn) = 1;
8993 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8996 ix86_internal_arg_pointer (void)
8998 return virtual_incoming_args_rtx;
9001 struct scratch_reg {
9006 /* Return a short-lived scratch register for use on function entry.
9007 In 32-bit mode, it is valid only after the registers are saved
9008 in the prologue. This register must be released by means of
9009 release_scratch_register_on_entry once it is dead. */
9012 get_scratch_register_on_entry (struct scratch_reg *sr)
9020 /* We always use R11 in 64-bit mode. */
9025 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9027 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9028 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9029 int regparm = ix86_function_regparm (fntype, decl);
9031 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9033 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9034 for the static chain register. */
9035 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9036 && drap_regno != AX_REG)
9038 else if (regparm < 2 && drap_regno != DX_REG)
9040 /* ecx is the static chain register. */
9041 else if (regparm < 3 && !fastcall_p && !static_chain_p
9042 && drap_regno != CX_REG)
9044 else if (ix86_save_reg (BX_REG, true))
9046 /* esi is the static chain register. */
9047 else if (!(regparm == 3 && static_chain_p)
9048 && ix86_save_reg (SI_REG, true))
9050 else if (ix86_save_reg (DI_REG, true))
9054 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9059 sr->reg = gen_rtx_REG (Pmode, regno);
9062 rtx insn = emit_insn (gen_push (sr->reg));
9063 RTX_FRAME_RELATED_P (insn) = 1;
9067 /* Release a scratch register obtained from the preceding function. */
9070 release_scratch_register_on_entry (struct scratch_reg *sr)
9074 rtx x, insn = emit_insn (gen_pop (sr->reg));
9076 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9077 RTX_FRAME_RELATED_P (insn) = 1;
9078 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9079 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9080 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9084 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9086 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9089 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9091 /* We skip the probe for the first interval + a small dope of 4 words and
9092 probe that many bytes past the specified size to maintain a protection
9093 area at the botton of the stack. */
9094 const int dope = 4 * UNITS_PER_WORD;
9095 rtx size_rtx = GEN_INT (size);
9097 /* See if we have a constant small number of probes to generate. If so,
9098 that's the easy case. The run-time loop is made up of 11 insns in the
9099 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9100 for n # of intervals. */
9101 if (size <= 5 * PROBE_INTERVAL)
9103 HOST_WIDE_INT i, adjust;
9104 bool first_probe = true;
9106 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9107 values of N from 1 until it exceeds SIZE. If only one probe is
9108 needed, this will not generate any code. Then adjust and probe
9109 to PROBE_INTERVAL + SIZE. */
9110 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9114 adjust = 2 * PROBE_INTERVAL + dope;
9115 first_probe = false;
9118 adjust = PROBE_INTERVAL;
9120 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9121 plus_constant (stack_pointer_rtx, -adjust)));
9122 emit_stack_probe (stack_pointer_rtx);
9126 adjust = size + PROBE_INTERVAL + dope;
9128 adjust = size + PROBE_INTERVAL - i;
9130 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9131 plus_constant (stack_pointer_rtx, -adjust)));
9132 emit_stack_probe (stack_pointer_rtx);
9134 /* Adjust back to account for the additional first interval. */
9135 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9136 plus_constant (stack_pointer_rtx,
9137 PROBE_INTERVAL + dope)));
9140 /* Otherwise, do the same as above, but in a loop. Note that we must be
9141 extra careful with variables wrapping around because we might be at
9142 the very top (or the very bottom) of the address space and we have
9143 to be able to handle this case properly; in particular, we use an
9144 equality test for the loop condition. */
9147 HOST_WIDE_INT rounded_size;
9148 struct scratch_reg sr;
9150 get_scratch_register_on_entry (&sr);
9153 /* Step 1: round SIZE to the previous multiple of the interval. */
9155 rounded_size = size & -PROBE_INTERVAL;
9158 /* Step 2: compute initial and final value of the loop counter. */
9160 /* SP = SP_0 + PROBE_INTERVAL. */
9161 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9162 plus_constant (stack_pointer_rtx,
9163 - (PROBE_INTERVAL + dope))));
9165 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9166 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9167 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9168 gen_rtx_PLUS (Pmode, sr.reg,
9169 stack_pointer_rtx)));
9174 while (SP != LAST_ADDR)
9176 SP = SP + PROBE_INTERVAL
9180 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9181 values of N from 1 until it is equal to ROUNDED_SIZE. */
9183 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9186 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9187 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9189 if (size != rounded_size)
9191 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9192 plus_constant (stack_pointer_rtx,
9193 rounded_size - size)));
9194 emit_stack_probe (stack_pointer_rtx);
9197 /* Adjust back to account for the additional first interval. */
9198 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9199 plus_constant (stack_pointer_rtx,
9200 PROBE_INTERVAL + dope)));
9202 release_scratch_register_on_entry (&sr);
9205 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9206 cfun->machine->fs.sp_offset += size;
9208 /* Make sure nothing is scheduled before we are done. */
9209 emit_insn (gen_blockage ());
9212 /* Adjust the stack pointer up to REG while probing it. */
9215 output_adjust_stack_and_probe (rtx reg)
9217 static int labelno = 0;
9218 char loop_lab[32], end_lab[32];
9221 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9222 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9224 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9226 /* Jump to END_LAB if SP == LAST_ADDR. */
9227 xops[0] = stack_pointer_rtx;
9229 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9230 fputs ("\tje\t", asm_out_file);
9231 assemble_name_raw (asm_out_file, end_lab);
9232 fputc ('\n', asm_out_file);
9234 /* SP = SP + PROBE_INTERVAL. */
9235 xops[1] = GEN_INT (PROBE_INTERVAL);
9236 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9239 xops[1] = const0_rtx;
9240 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9242 fprintf (asm_out_file, "\tjmp\t");
9243 assemble_name_raw (asm_out_file, loop_lab);
9244 fputc ('\n', asm_out_file);
9246 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9251 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9252 inclusive. These are offsets from the current stack pointer. */
9255 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9257 /* See if we have a constant small number of probes to generate. If so,
9258 that's the easy case. The run-time loop is made up of 7 insns in the
9259 generic case while the compile-time loop is made up of n insns for n #
9261 if (size <= 7 * PROBE_INTERVAL)
9265 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9266 it exceeds SIZE. If only one probe is needed, this will not
9267 generate any code. Then probe at FIRST + SIZE. */
9268 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9269 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9271 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9274 /* Otherwise, do the same as above, but in a loop. Note that we must be
9275 extra careful with variables wrapping around because we might be at
9276 the very top (or the very bottom) of the address space and we have
9277 to be able to handle this case properly; in particular, we use an
9278 equality test for the loop condition. */
9281 HOST_WIDE_INT rounded_size, last;
9282 struct scratch_reg sr;
9284 get_scratch_register_on_entry (&sr);
9287 /* Step 1: round SIZE to the previous multiple of the interval. */
9289 rounded_size = size & -PROBE_INTERVAL;
9292 /* Step 2: compute initial and final value of the loop counter. */
9294 /* TEST_OFFSET = FIRST. */
9295 emit_move_insn (sr.reg, GEN_INT (-first));
9297 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9298 last = first + rounded_size;
9303 while (TEST_ADDR != LAST_ADDR)
9305 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9309 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9310 until it is equal to ROUNDED_SIZE. */
9312 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9315 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9316 that SIZE is equal to ROUNDED_SIZE. */
9318 if (size != rounded_size)
9319 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9322 rounded_size - size));
9324 release_scratch_register_on_entry (&sr);
9327 /* Make sure nothing is scheduled before we are done. */
9328 emit_insn (gen_blockage ());
9331 /* Probe a range of stack addresses from REG to END, inclusive. These are
9332 offsets from the current stack pointer. */
9335 output_probe_stack_range (rtx reg, rtx end)
9337 static int labelno = 0;
9338 char loop_lab[32], end_lab[32];
9341 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9342 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9344 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9346 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9349 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9350 fputs ("\tje\t", asm_out_file);
9351 assemble_name_raw (asm_out_file, end_lab);
9352 fputc ('\n', asm_out_file);
9354 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9355 xops[1] = GEN_INT (PROBE_INTERVAL);
9356 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9358 /* Probe at TEST_ADDR. */
9359 xops[0] = stack_pointer_rtx;
9361 xops[2] = const0_rtx;
9362 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9364 fprintf (asm_out_file, "\tjmp\t");
9365 assemble_name_raw (asm_out_file, loop_lab);
9366 fputc ('\n', asm_out_file);
9368 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9373 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9374 to be generated in correct form. */
9376 ix86_finalize_stack_realign_flags (void)
9378 /* Check if stack realign is really needed after reload, and
9379 stores result in cfun */
9380 unsigned int incoming_stack_boundary
9381 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9382 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9383 unsigned int stack_realign = (incoming_stack_boundary
9384 < (current_function_is_leaf
9385 ? crtl->max_used_stack_slot_alignment
9386 : crtl->stack_alignment_needed));
9388 if (crtl->stack_realign_finalized)
9390 /* After stack_realign_needed is finalized, we can't no longer
9392 gcc_assert (crtl->stack_realign_needed == stack_realign);
9396 crtl->stack_realign_needed = stack_realign;
9397 crtl->stack_realign_finalized = true;
9401 /* Expand the prologue into a bunch of separate insns. */
9404 ix86_expand_prologue (void)
9406 struct machine_function *m = cfun->machine;
9409 struct ix86_frame frame;
9410 HOST_WIDE_INT allocate;
9411 bool int_registers_saved;
9413 ix86_finalize_stack_realign_flags ();
9415 /* DRAP should not coexist with stack_realign_fp */
9416 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9418 memset (&m->fs, 0, sizeof (m->fs));
9420 /* Initialize CFA state for before the prologue. */
9421 m->fs.cfa_reg = stack_pointer_rtx;
9422 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9424 /* Track SP offset to the CFA. We continue tracking this after we've
9425 swapped the CFA register away from SP. In the case of re-alignment
9426 this is fudged; we're interested to offsets within the local frame. */
9427 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9428 m->fs.sp_valid = true;
9430 ix86_compute_frame_layout (&frame);
9432 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9434 /* We should have already generated an error for any use of
9435 ms_hook on a nested function. */
9436 gcc_checking_assert (!ix86_static_chain_on_stack);
9438 /* Check if profiling is active and we shall use profiling before
9439 prologue variant. If so sorry. */
9440 if (crtl->profile && flag_fentry != 0)
9441 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9443 /* In ix86_asm_output_function_label we emitted:
9444 8b ff movl.s %edi,%edi
9446 8b ec movl.s %esp,%ebp
9448 This matches the hookable function prologue in Win32 API
9449 functions in Microsoft Windows XP Service Pack 2 and newer.
9450 Wine uses this to enable Windows apps to hook the Win32 API
9451 functions provided by Wine.
9453 What that means is that we've already set up the frame pointer. */
9455 if (frame_pointer_needed
9456 && !(crtl->drap_reg && crtl->stack_realign_needed))
9460 /* We've decided to use the frame pointer already set up.
9461 Describe this to the unwinder by pretending that both
9462 push and mov insns happen right here.
9464 Putting the unwind info here at the end of the ms_hook
9465 is done so that we can make absolutely certain we get
9466 the required byte sequence at the start of the function,
9467 rather than relying on an assembler that can produce
9468 the exact encoding required.
9470 However it does mean (in the unpatched case) that we have
9471 a 1 insn window where the asynchronous unwind info is
9472 incorrect. However, if we placed the unwind info at
9473 its correct location we would have incorrect unwind info
9474 in the patched case. Which is probably all moot since
9475 I don't expect Wine generates dwarf2 unwind info for the
9476 system libraries that use this feature. */
9478 insn = emit_insn (gen_blockage ());
9480 push = gen_push (hard_frame_pointer_rtx);
9481 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9483 RTX_FRAME_RELATED_P (push) = 1;
9484 RTX_FRAME_RELATED_P (mov) = 1;
9486 RTX_FRAME_RELATED_P (insn) = 1;
9487 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9488 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9490 /* Note that gen_push incremented m->fs.cfa_offset, even
9491 though we didn't emit the push insn here. */
9492 m->fs.cfa_reg = hard_frame_pointer_rtx;
9493 m->fs.fp_offset = m->fs.cfa_offset;
9494 m->fs.fp_valid = true;
9498 /* The frame pointer is not needed so pop %ebp again.
9499 This leaves us with a pristine state. */
9500 emit_insn (gen_pop (hard_frame_pointer_rtx));
9504 /* The first insn of a function that accepts its static chain on the
9505 stack is to push the register that would be filled in by a direct
9506 call. This insn will be skipped by the trampoline. */
9507 else if (ix86_static_chain_on_stack)
9509 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9510 emit_insn (gen_blockage ());
9512 /* We don't want to interpret this push insn as a register save,
9513 only as a stack adjustment. The real copy of the register as
9514 a save will be done later, if needed. */
9515 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9516 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9517 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9518 RTX_FRAME_RELATED_P (insn) = 1;
9521 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9522 of DRAP is needed and stack realignment is really needed after reload */
9523 if (stack_realign_drap)
9525 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9527 /* Only need to push parameter pointer reg if it is caller saved. */
9528 if (!call_used_regs[REGNO (crtl->drap_reg)])
9530 /* Push arg pointer reg */
9531 insn = emit_insn (gen_push (crtl->drap_reg));
9532 RTX_FRAME_RELATED_P (insn) = 1;
9535 /* Grab the argument pointer. */
9536 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9537 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9538 RTX_FRAME_RELATED_P (insn) = 1;
9539 m->fs.cfa_reg = crtl->drap_reg;
9540 m->fs.cfa_offset = 0;
9542 /* Align the stack. */
9543 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9545 GEN_INT (-align_bytes)));
9546 RTX_FRAME_RELATED_P (insn) = 1;
9548 /* Replicate the return address on the stack so that return
9549 address can be reached via (argp - 1) slot. This is needed
9550 to implement macro RETURN_ADDR_RTX and intrinsic function
9551 expand_builtin_return_addr etc. */
9552 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9553 t = gen_frame_mem (Pmode, t);
9554 insn = emit_insn (gen_push (t));
9555 RTX_FRAME_RELATED_P (insn) = 1;
9557 /* For the purposes of frame and register save area addressing,
9558 we've started over with a new frame. */
9559 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9560 m->fs.realigned = true;
9563 if (frame_pointer_needed && !m->fs.fp_valid)
9565 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9566 slower on all targets. Also sdb doesn't like it. */
9567 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9568 RTX_FRAME_RELATED_P (insn) = 1;
9570 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9571 RTX_FRAME_RELATED_P (insn) = 1;
9573 if (m->fs.cfa_reg == stack_pointer_rtx)
9574 m->fs.cfa_reg = hard_frame_pointer_rtx;
9575 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9576 m->fs.fp_offset = m->fs.sp_offset;
9577 m->fs.fp_valid = true;
9580 int_registers_saved = (frame.nregs == 0);
9582 if (!int_registers_saved)
9584 /* If saving registers via PUSH, do so now. */
9585 if (!frame.save_regs_using_mov)
9587 ix86_emit_save_regs ();
9588 int_registers_saved = true;
9589 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9592 /* When using red zone we may start register saving before allocating
9593 the stack frame saving one cycle of the prologue. However, avoid
9594 doing this if we have to probe the stack; at least on x86_64 the
9595 stack probe can turn into a call that clobbers a red zone location. */
9596 else if (ix86_using_red_zone ()
9597 && (! TARGET_STACK_PROBE
9598 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9600 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9601 int_registers_saved = true;
9605 if (stack_realign_fp)
9607 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9608 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9610 /* The computation of the size of the re-aligned stack frame means
9611 that we must allocate the size of the register save area before
9612 performing the actual alignment. Otherwise we cannot guarantee
9613 that there's enough storage above the realignment point. */
9614 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9615 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9616 GEN_INT (m->fs.sp_offset
9617 - frame.sse_reg_save_offset),
9620 /* Align the stack. */
9621 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9623 GEN_INT (-align_bytes)));
9625 /* For the purposes of register save area addressing, the stack
9626 pointer is no longer valid. As for the value of sp_offset,
9627 see ix86_compute_frame_layout, which we need to match in order
9628 to pass verification of stack_pointer_offset at the end. */
9629 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9630 m->fs.sp_valid = false;
9633 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9635 if (flag_stack_usage)
9637 /* We start to count from ARG_POINTER. */
9638 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9640 /* If it was realigned, take into account the fake frame. */
9641 if (stack_realign_drap)
9643 if (ix86_static_chain_on_stack)
9644 stack_size += UNITS_PER_WORD;
9646 if (!call_used_regs[REGNO (crtl->drap_reg)])
9647 stack_size += UNITS_PER_WORD;
9649 /* This over-estimates by 1 minimal-stack-alignment-unit but
9650 mitigates that by counting in the new return address slot. */
9651 current_function_dynamic_stack_size
9652 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9655 current_function_static_stack_size = stack_size;
9658 /* The stack has already been decremented by the instruction calling us
9659 so we need to probe unconditionally to preserve the protection area. */
9660 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9662 /* We expect the registers to be saved when probes are used. */
9663 gcc_assert (int_registers_saved);
9665 if (STACK_CHECK_MOVING_SP)
9667 ix86_adjust_stack_and_probe (allocate);
9672 HOST_WIDE_INT size = allocate;
9674 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9675 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9677 if (TARGET_STACK_PROBE)
9678 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9680 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9686 else if (!ix86_target_stack_probe ()
9687 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9689 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9690 GEN_INT (-allocate), -1,
9691 m->fs.cfa_reg == stack_pointer_rtx);
9695 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9698 if (cfun->machine->call_abi == MS_ABI)
9701 eax_live = ix86_eax_live_at_start_p ();
9705 emit_insn (gen_push (eax));
9706 allocate -= UNITS_PER_WORD;
9709 emit_move_insn (eax, GEN_INT (allocate));
9711 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9713 if (m->fs.cfa_reg == stack_pointer_rtx)
9715 m->fs.cfa_offset += allocate;
9716 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9717 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9718 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9719 RTX_FRAME_RELATED_P (insn) = 1;
9721 m->fs.sp_offset += allocate;
9725 t = choose_baseaddr (m->fs.sp_offset - allocate);
9726 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9729 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9731 if (!int_registers_saved)
9732 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9734 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9736 pic_reg_used = false;
9737 if (pic_offset_table_rtx
9738 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9741 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9743 if (alt_pic_reg_used != INVALID_REGNUM)
9744 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9746 pic_reg_used = true;
9753 if (ix86_cmodel == CM_LARGE_PIC)
9755 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9756 rtx label = gen_label_rtx ();
9758 LABEL_PRESERVE_P (label) = 1;
9759 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9760 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9761 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9762 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9763 pic_offset_table_rtx, tmp_reg));
9766 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9769 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9772 /* In the pic_reg_used case, make sure that the got load isn't deleted
9773 when mcount needs it. Blockage to avoid call movement across mcount
9774 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9776 if (crtl->profile && !flag_fentry && pic_reg_used)
9777 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9779 if (crtl->drap_reg && !crtl->stack_realign_needed)
9781 /* vDRAP is setup but after reload it turns out stack realign
9782 isn't necessary, here we will emit prologue to setup DRAP
9783 without stack realign adjustment */
9784 t = choose_baseaddr (0);
9785 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9788 /* Prevent instructions from being scheduled into register save push
9789 sequence when access to the redzone area is done through frame pointer.
9790 The offset between the frame pointer and the stack pointer is calculated
9791 relative to the value of the stack pointer at the end of the function
9792 prologue, and moving instructions that access redzone area via frame
9793 pointer inside push sequence violates this assumption. */
9794 if (frame_pointer_needed && frame.red_zone_size)
9795 emit_insn (gen_memory_blockage ());
9797 /* Emit cld instruction if stringops are used in the function. */
9798 if (TARGET_CLD && ix86_current_function_needs_cld)
9799 emit_insn (gen_cld ());
9802 /* Emit code to restore REG using a POP insn. */
9805 ix86_emit_restore_reg_using_pop (rtx reg)
9807 struct machine_function *m = cfun->machine;
9808 rtx insn = emit_insn (gen_pop (reg));
9810 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9811 m->fs.sp_offset -= UNITS_PER_WORD;
9813 if (m->fs.cfa_reg == crtl->drap_reg
9814 && REGNO (reg) == REGNO (crtl->drap_reg))
9816 /* Previously we'd represented the CFA as an expression
9817 like *(%ebp - 8). We've just popped that value from
9818 the stack, which means we need to reset the CFA to
9819 the drap register. This will remain until we restore
9820 the stack pointer. */
9821 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9822 RTX_FRAME_RELATED_P (insn) = 1;
9824 /* This means that the DRAP register is valid for addressing too. */
9825 m->fs.drap_valid = true;
9829 if (m->fs.cfa_reg == stack_pointer_rtx)
9831 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9832 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9833 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9834 RTX_FRAME_RELATED_P (insn) = 1;
9836 m->fs.cfa_offset -= UNITS_PER_WORD;
9839 /* When the frame pointer is the CFA, and we pop it, we are
9840 swapping back to the stack pointer as the CFA. This happens
9841 for stack frames that don't allocate other data, so we assume
9842 the stack pointer is now pointing at the return address, i.e.
9843 the function entry state, which makes the offset be 1 word. */
9844 if (reg == hard_frame_pointer_rtx)
9846 m->fs.fp_valid = false;
9847 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9849 m->fs.cfa_reg = stack_pointer_rtx;
9850 m->fs.cfa_offset -= UNITS_PER_WORD;
9852 add_reg_note (insn, REG_CFA_DEF_CFA,
9853 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9854 GEN_INT (m->fs.cfa_offset)));
9855 RTX_FRAME_RELATED_P (insn) = 1;
9860 /* Emit code to restore saved registers using POP insns. */
9863 ix86_emit_restore_regs_using_pop (void)
9867 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9868 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9869 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9872 /* Emit code and notes for the LEAVE instruction. */
9875 ix86_emit_leave (void)
9877 struct machine_function *m = cfun->machine;
9878 rtx insn = emit_insn (ix86_gen_leave ());
9880 ix86_add_queued_cfa_restore_notes (insn);
9882 gcc_assert (m->fs.fp_valid);
9883 m->fs.sp_valid = true;
9884 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9885 m->fs.fp_valid = false;
9887 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9889 m->fs.cfa_reg = stack_pointer_rtx;
9890 m->fs.cfa_offset = m->fs.sp_offset;
9892 add_reg_note (insn, REG_CFA_DEF_CFA,
9893 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9894 RTX_FRAME_RELATED_P (insn) = 1;
9895 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9900 /* Emit code to restore saved registers using MOV insns.
9901 First register is restored from CFA - CFA_OFFSET. */
9903 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9904 int maybe_eh_return)
9906 struct machine_function *m = cfun->machine;
9909 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9910 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9912 rtx reg = gen_rtx_REG (Pmode, regno);
9915 mem = choose_baseaddr (cfa_offset);
9916 mem = gen_frame_mem (Pmode, mem);
9917 insn = emit_move_insn (reg, mem);
9919 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9921 /* Previously we'd represented the CFA as an expression
9922 like *(%ebp - 8). We've just popped that value from
9923 the stack, which means we need to reset the CFA to
9924 the drap register. This will remain until we restore
9925 the stack pointer. */
9926 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9927 RTX_FRAME_RELATED_P (insn) = 1;
9929 /* This means that the DRAP register is valid for addressing. */
9930 m->fs.drap_valid = true;
9933 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9935 cfa_offset -= UNITS_PER_WORD;
9939 /* Emit code to restore saved registers using MOV insns.
9940 First register is restored from CFA - CFA_OFFSET. */
9942 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9943 int maybe_eh_return)
9947 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9948 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9950 rtx reg = gen_rtx_REG (V4SFmode, regno);
9953 mem = choose_baseaddr (cfa_offset);
9954 mem = gen_rtx_MEM (V4SFmode, mem);
9955 set_mem_align (mem, 128);
9956 emit_move_insn (reg, mem);
9958 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9964 /* Restore function stack, frame, and registers. */
9967 ix86_expand_epilogue (int style)
9969 struct machine_function *m = cfun->machine;
9970 struct machine_frame_state frame_state_save = m->fs;
9971 struct ix86_frame frame;
9972 bool restore_regs_via_mov;
9975 ix86_finalize_stack_realign_flags ();
9976 ix86_compute_frame_layout (&frame);
9978 m->fs.sp_valid = (!frame_pointer_needed
9979 || (current_function_sp_is_unchanging
9980 && !stack_realign_fp));
9981 gcc_assert (!m->fs.sp_valid
9982 || m->fs.sp_offset == frame.stack_pointer_offset);
9984 /* The FP must be valid if the frame pointer is present. */
9985 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9986 gcc_assert (!m->fs.fp_valid
9987 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9989 /* We must have *some* valid pointer to the stack frame. */
9990 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9992 /* The DRAP is never valid at this point. */
9993 gcc_assert (!m->fs.drap_valid);
9995 /* See the comment about red zone and frame
9996 pointer usage in ix86_expand_prologue. */
9997 if (frame_pointer_needed && frame.red_zone_size)
9998 emit_insn (gen_memory_blockage ());
10000 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10001 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10003 /* Determine the CFA offset of the end of the red-zone. */
10004 m->fs.red_zone_offset = 0;
10005 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10007 /* The red-zone begins below the return address. */
10008 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10010 /* When the register save area is in the aligned portion of
10011 the stack, determine the maximum runtime displacement that
10012 matches up with the aligned frame. */
10013 if (stack_realign_drap)
10014 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10018 /* Special care must be taken for the normal return case of a function
10019 using eh_return: the eax and edx registers are marked as saved, but
10020 not restored along this path. Adjust the save location to match. */
10021 if (crtl->calls_eh_return && style != 2)
10022 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10024 /* If we're only restoring one register and sp is not valid then
10025 using a move instruction to restore the register since it's
10026 less work than reloading sp and popping the register. */
10027 if (!m->fs.sp_valid && frame.nregs <= 1)
10028 restore_regs_via_mov = true;
10029 /* EH_RETURN requires the use of moves to function properly. */
10030 else if (crtl->calls_eh_return)
10031 restore_regs_via_mov = true;
10032 else if (TARGET_EPILOGUE_USING_MOVE
10033 && cfun->machine->use_fast_prologue_epilogue
10034 && (frame.nregs > 1
10035 || m->fs.sp_offset != frame.reg_save_offset))
10036 restore_regs_via_mov = true;
10037 else if (frame_pointer_needed
10039 && m->fs.sp_offset != frame.reg_save_offset)
10040 restore_regs_via_mov = true;
10041 else if (frame_pointer_needed
10042 && TARGET_USE_LEAVE
10043 && cfun->machine->use_fast_prologue_epilogue
10044 && frame.nregs == 1)
10045 restore_regs_via_mov = true;
10047 restore_regs_via_mov = false;
10049 if (restore_regs_via_mov || frame.nsseregs)
10051 /* Ensure that the entire register save area is addressable via
10052 the stack pointer, if we will restore via sp. */
10054 && m->fs.sp_offset > 0x7fffffff
10055 && !(m->fs.fp_valid || m->fs.drap_valid)
10056 && (frame.nsseregs + frame.nregs) != 0)
10058 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10059 GEN_INT (m->fs.sp_offset
10060 - frame.sse_reg_save_offset),
10062 m->fs.cfa_reg == stack_pointer_rtx);
10066 /* If there are any SSE registers to restore, then we have to do it
10067 via moves, since there's obviously no pop for SSE regs. */
10068 if (frame.nsseregs)
10069 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10072 if (restore_regs_via_mov)
10077 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10079 /* eh_return epilogues need %ecx added to the stack pointer. */
10082 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10084 /* Stack align doesn't work with eh_return. */
10085 gcc_assert (!stack_realign_drap);
10086 /* Neither does regparm nested functions. */
10087 gcc_assert (!ix86_static_chain_on_stack);
10089 if (frame_pointer_needed)
10091 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10092 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10093 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10095 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10096 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10098 /* Note that we use SA as a temporary CFA, as the return
10099 address is at the proper place relative to it. We
10100 pretend this happens at the FP restore insn because
10101 prior to this insn the FP would be stored at the wrong
10102 offset relative to SA, and after this insn we have no
10103 other reasonable register to use for the CFA. We don't
10104 bother resetting the CFA to the SP for the duration of
10105 the return insn. */
10106 add_reg_note (insn, REG_CFA_DEF_CFA,
10107 plus_constant (sa, UNITS_PER_WORD));
10108 ix86_add_queued_cfa_restore_notes (insn);
10109 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10110 RTX_FRAME_RELATED_P (insn) = 1;
10112 m->fs.cfa_reg = sa;
10113 m->fs.cfa_offset = UNITS_PER_WORD;
10114 m->fs.fp_valid = false;
10116 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10117 const0_rtx, style, false);
10121 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10122 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10123 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10124 ix86_add_queued_cfa_restore_notes (insn);
10126 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10127 if (m->fs.cfa_offset != UNITS_PER_WORD)
10129 m->fs.cfa_offset = UNITS_PER_WORD;
10130 add_reg_note (insn, REG_CFA_DEF_CFA,
10131 plus_constant (stack_pointer_rtx,
10133 RTX_FRAME_RELATED_P (insn) = 1;
10136 m->fs.sp_offset = UNITS_PER_WORD;
10137 m->fs.sp_valid = true;
10142 /* First step is to deallocate the stack frame so that we can
10143 pop the registers. */
10144 if (!m->fs.sp_valid)
10146 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10147 GEN_INT (m->fs.fp_offset
10148 - frame.reg_save_offset),
10151 else if (m->fs.sp_offset != frame.reg_save_offset)
10153 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10154 GEN_INT (m->fs.sp_offset
10155 - frame.reg_save_offset),
10157 m->fs.cfa_reg == stack_pointer_rtx);
10160 ix86_emit_restore_regs_using_pop ();
10163 /* If we used a stack pointer and haven't already got rid of it,
10165 if (m->fs.fp_valid)
10167 /* If the stack pointer is valid and pointing at the frame
10168 pointer store address, then we only need a pop. */
10169 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10170 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10171 /* Leave results in shorter dependency chains on CPUs that are
10172 able to grok it fast. */
10173 else if (TARGET_USE_LEAVE
10174 || optimize_function_for_size_p (cfun)
10175 || !cfun->machine->use_fast_prologue_epilogue)
10176 ix86_emit_leave ();
10179 pro_epilogue_adjust_stack (stack_pointer_rtx,
10180 hard_frame_pointer_rtx,
10181 const0_rtx, style, !using_drap);
10182 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10188 int param_ptr_offset = UNITS_PER_WORD;
10191 gcc_assert (stack_realign_drap);
10193 if (ix86_static_chain_on_stack)
10194 param_ptr_offset += UNITS_PER_WORD;
10195 if (!call_used_regs[REGNO (crtl->drap_reg)])
10196 param_ptr_offset += UNITS_PER_WORD;
10198 insn = emit_insn (gen_rtx_SET
10199 (VOIDmode, stack_pointer_rtx,
10200 gen_rtx_PLUS (Pmode,
10202 GEN_INT (-param_ptr_offset))));
10203 m->fs.cfa_reg = stack_pointer_rtx;
10204 m->fs.cfa_offset = param_ptr_offset;
10205 m->fs.sp_offset = param_ptr_offset;
10206 m->fs.realigned = false;
10208 add_reg_note (insn, REG_CFA_DEF_CFA,
10209 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10210 GEN_INT (param_ptr_offset)));
10211 RTX_FRAME_RELATED_P (insn) = 1;
10213 if (!call_used_regs[REGNO (crtl->drap_reg)])
10214 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10217 /* At this point the stack pointer must be valid, and we must have
10218 restored all of the registers. We may not have deallocated the
10219 entire stack frame. We've delayed this until now because it may
10220 be possible to merge the local stack deallocation with the
10221 deallocation forced by ix86_static_chain_on_stack. */
10222 gcc_assert (m->fs.sp_valid);
10223 gcc_assert (!m->fs.fp_valid);
10224 gcc_assert (!m->fs.realigned);
10225 if (m->fs.sp_offset != UNITS_PER_WORD)
10227 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10228 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10232 /* Sibcall epilogues don't want a return instruction. */
10235 m->fs = frame_state_save;
10239 if (crtl->args.pops_args && crtl->args.size)
10241 rtx popc = GEN_INT (crtl->args.pops_args);
10243 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10244 address, do explicit add, and jump indirectly to the caller. */
10246 if (crtl->args.pops_args >= 65536)
10248 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10251 /* There is no "pascal" calling convention in any 64bit ABI. */
10252 gcc_assert (!TARGET_64BIT);
10254 insn = emit_insn (gen_pop (ecx));
10255 m->fs.cfa_offset -= UNITS_PER_WORD;
10256 m->fs.sp_offset -= UNITS_PER_WORD;
10258 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10259 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10260 add_reg_note (insn, REG_CFA_REGISTER,
10261 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10262 RTX_FRAME_RELATED_P (insn) = 1;
10264 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10266 emit_jump_insn (gen_return_indirect_internal (ecx));
10269 emit_jump_insn (gen_return_pop_internal (popc));
10272 emit_jump_insn (gen_return_internal ());
10274 /* Restore the state back to the state from the prologue,
10275 so that it's correct for the next epilogue. */
10276 m->fs = frame_state_save;
10279 /* Reset from the function's potential modifications. */
10282 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10283 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10285 if (pic_offset_table_rtx)
10286 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10288 /* Mach-O doesn't support labels at the end of objects, so if
10289 it looks like we might want one, insert a NOP. */
10291 rtx insn = get_last_insn ();
10294 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10295 insn = PREV_INSN (insn);
10299 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10300 fputs ("\tnop\n", file);
10306 /* Extract the parts of an RTL expression that is a valid memory address
10307 for an instruction. Return 0 if the structure of the address is
10308 grossly off. Return -1 if the address contains ASHIFT, so it is not
10309 strictly valid, but still used for computing length of lea instruction. */
10312 ix86_decompose_address (rtx addr, struct ix86_address *out)
10314 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10315 rtx base_reg, index_reg;
10316 HOST_WIDE_INT scale = 1;
10317 rtx scale_rtx = NULL_RTX;
10320 enum ix86_address_seg seg = SEG_DEFAULT;
10322 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10324 else if (GET_CODE (addr) == PLUS)
10326 rtx addends[4], op;
10334 addends[n++] = XEXP (op, 1);
10337 while (GET_CODE (op) == PLUS);
10342 for (i = n; i >= 0; --i)
10345 switch (GET_CODE (op))
10350 index = XEXP (op, 0);
10351 scale_rtx = XEXP (op, 1);
10357 index = XEXP (op, 0);
10358 tmp = XEXP (op, 1);
10359 if (!CONST_INT_P (tmp))
10361 scale = INTVAL (tmp);
10362 if ((unsigned HOST_WIDE_INT) scale > 3)
10364 scale = 1 << scale;
10368 if (XINT (op, 1) == UNSPEC_TP
10369 && TARGET_TLS_DIRECT_SEG_REFS
10370 && seg == SEG_DEFAULT)
10371 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10400 else if (GET_CODE (addr) == MULT)
10402 index = XEXP (addr, 0); /* index*scale */
10403 scale_rtx = XEXP (addr, 1);
10405 else if (GET_CODE (addr) == ASHIFT)
10407 /* We're called for lea too, which implements ashift on occasion. */
10408 index = XEXP (addr, 0);
10409 tmp = XEXP (addr, 1);
10410 if (!CONST_INT_P (tmp))
10412 scale = INTVAL (tmp);
10413 if ((unsigned HOST_WIDE_INT) scale > 3)
10415 scale = 1 << scale;
10419 disp = addr; /* displacement */
10421 /* Extract the integral value of scale. */
10424 if (!CONST_INT_P (scale_rtx))
10426 scale = INTVAL (scale_rtx);
10429 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10430 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10432 /* Avoid useless 0 displacement. */
10433 if (disp == const0_rtx && (base || index))
10436 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10437 if (base_reg && index_reg && scale == 1
10438 && (index_reg == arg_pointer_rtx
10439 || index_reg == frame_pointer_rtx
10440 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10443 tmp = base, base = index, index = tmp;
10444 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10447 /* Special case: %ebp cannot be encoded as a base without a displacement.
10451 && (base_reg == hard_frame_pointer_rtx
10452 || base_reg == frame_pointer_rtx
10453 || base_reg == arg_pointer_rtx
10454 || (REG_P (base_reg)
10455 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10456 || REGNO (base_reg) == R13_REG))))
10459 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10460 Avoid this by transforming to [%esi+0].
10461 Reload calls address legitimization without cfun defined, so we need
10462 to test cfun for being non-NULL. */
10463 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10464 && base_reg && !index_reg && !disp
10465 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10468 /* Special case: encode reg+reg instead of reg*2. */
10469 if (!base && index && scale == 2)
10470 base = index, base_reg = index_reg, scale = 1;
10472 /* Special case: scaling cannot be encoded without base or displacement. */
10473 if (!base && !disp && index && scale != 1)
10477 out->index = index;
10479 out->scale = scale;
10485 /* Return cost of the memory address x.
10486 For i386, it is better to use a complex address than let gcc copy
10487 the address into a reg and make a new pseudo. But not if the address
10488 requires to two regs - that would mean more pseudos with longer
10491 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10493 struct ix86_address parts;
10495 int ok = ix86_decompose_address (x, &parts);
10499 if (parts.base && GET_CODE (parts.base) == SUBREG)
10500 parts.base = SUBREG_REG (parts.base);
10501 if (parts.index && GET_CODE (parts.index) == SUBREG)
10502 parts.index = SUBREG_REG (parts.index);
10504 /* Attempt to minimize number of registers in the address. */
10506 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10508 && (!REG_P (parts.index)
10509 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10513 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10515 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10516 && parts.base != parts.index)
10519 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10520 since it's predecode logic can't detect the length of instructions
10521 and it degenerates to vector decoded. Increase cost of such
10522 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10523 to split such addresses or even refuse such addresses at all.
10525 Following addressing modes are affected:
10530 The first and last case may be avoidable by explicitly coding the zero in
10531 memory address, but I don't have AMD-K6 machine handy to check this
10535 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10536 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10537 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10543 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10544 this is used for to form addresses to local data when -fPIC is in
10548 darwin_local_data_pic (rtx disp)
10550 return (GET_CODE (disp) == UNSPEC
10551 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10554 /* Determine if a given RTX is a valid constant. We already know this
10555 satisfies CONSTANT_P. */
10558 legitimate_constant_p (rtx x)
10560 switch (GET_CODE (x))
10565 if (GET_CODE (x) == PLUS)
10567 if (!CONST_INT_P (XEXP (x, 1)))
10572 if (TARGET_MACHO && darwin_local_data_pic (x))
10575 /* Only some unspecs are valid as "constants". */
10576 if (GET_CODE (x) == UNSPEC)
10577 switch (XINT (x, 1))
10580 case UNSPEC_GOTOFF:
10581 case UNSPEC_PLTOFF:
10582 return TARGET_64BIT;
10584 case UNSPEC_NTPOFF:
10585 x = XVECEXP (x, 0, 0);
10586 return (GET_CODE (x) == SYMBOL_REF
10587 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10588 case UNSPEC_DTPOFF:
10589 x = XVECEXP (x, 0, 0);
10590 return (GET_CODE (x) == SYMBOL_REF
10591 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10596 /* We must have drilled down to a symbol. */
10597 if (GET_CODE (x) == LABEL_REF)
10599 if (GET_CODE (x) != SYMBOL_REF)
10604 /* TLS symbols are never valid. */
10605 if (SYMBOL_REF_TLS_MODEL (x))
10608 /* DLLIMPORT symbols are never valid. */
10609 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10610 && SYMBOL_REF_DLLIMPORT_P (x))
10615 if (GET_MODE (x) == TImode
10616 && x != CONST0_RTX (TImode)
10622 if (!standard_sse_constant_p (x))
10629 /* Otherwise we handle everything else in the move patterns. */
10633 /* Determine if it's legal to put X into the constant pool. This
10634 is not possible for the address of thread-local symbols, which
10635 is checked above. */
10638 ix86_cannot_force_const_mem (rtx x)
10640 /* We can always put integral constants and vectors in memory. */
10641 switch (GET_CODE (x))
10651 return !legitimate_constant_p (x);
10655 /* Nonzero if the constant value X is a legitimate general operand
10656 when generating PIC code. It is given that flag_pic is on and
10657 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10660 legitimate_pic_operand_p (rtx x)
10664 switch (GET_CODE (x))
10667 inner = XEXP (x, 0);
10668 if (GET_CODE (inner) == PLUS
10669 && CONST_INT_P (XEXP (inner, 1)))
10670 inner = XEXP (inner, 0);
10672 /* Only some unspecs are valid as "constants". */
10673 if (GET_CODE (inner) == UNSPEC)
10674 switch (XINT (inner, 1))
10677 case UNSPEC_GOTOFF:
10678 case UNSPEC_PLTOFF:
10679 return TARGET_64BIT;
10681 x = XVECEXP (inner, 0, 0);
10682 return (GET_CODE (x) == SYMBOL_REF
10683 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10684 case UNSPEC_MACHOPIC_OFFSET:
10685 return legitimate_pic_address_disp_p (x);
10693 return legitimate_pic_address_disp_p (x);
10700 /* Determine if a given CONST RTX is a valid memory displacement
10704 legitimate_pic_address_disp_p (rtx disp)
10708 /* In 64bit mode we can allow direct addresses of symbols and labels
10709 when they are not dynamic symbols. */
10712 rtx op0 = disp, op1;
10714 switch (GET_CODE (disp))
10720 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10722 op0 = XEXP (XEXP (disp, 0), 0);
10723 op1 = XEXP (XEXP (disp, 0), 1);
10724 if (!CONST_INT_P (op1)
10725 || INTVAL (op1) >= 16*1024*1024
10726 || INTVAL (op1) < -16*1024*1024)
10728 if (GET_CODE (op0) == LABEL_REF)
10730 if (GET_CODE (op0) != SYMBOL_REF)
10735 /* TLS references should always be enclosed in UNSPEC. */
10736 if (SYMBOL_REF_TLS_MODEL (op0))
10738 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10739 && ix86_cmodel != CM_LARGE_PIC)
10747 if (GET_CODE (disp) != CONST)
10749 disp = XEXP (disp, 0);
10753 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10754 of GOT tables. We should not need these anyway. */
10755 if (GET_CODE (disp) != UNSPEC
10756 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10757 && XINT (disp, 1) != UNSPEC_GOTOFF
10758 && XINT (disp, 1) != UNSPEC_PLTOFF))
10761 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10762 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10768 if (GET_CODE (disp) == PLUS)
10770 if (!CONST_INT_P (XEXP (disp, 1)))
10772 disp = XEXP (disp, 0);
10776 if (TARGET_MACHO && darwin_local_data_pic (disp))
10779 if (GET_CODE (disp) != UNSPEC)
10782 switch (XINT (disp, 1))
10787 /* We need to check for both symbols and labels because VxWorks loads
10788 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10790 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10791 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10792 case UNSPEC_GOTOFF:
10793 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10794 While ABI specify also 32bit relocation but we don't produce it in
10795 small PIC model at all. */
10796 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10797 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10799 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10801 case UNSPEC_GOTTPOFF:
10802 case UNSPEC_GOTNTPOFF:
10803 case UNSPEC_INDNTPOFF:
10806 disp = XVECEXP (disp, 0, 0);
10807 return (GET_CODE (disp) == SYMBOL_REF
10808 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10809 case UNSPEC_NTPOFF:
10810 disp = XVECEXP (disp, 0, 0);
10811 return (GET_CODE (disp) == SYMBOL_REF
10812 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10813 case UNSPEC_DTPOFF:
10814 disp = XVECEXP (disp, 0, 0);
10815 return (GET_CODE (disp) == SYMBOL_REF
10816 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10822 /* Recognizes RTL expressions that are valid memory addresses for an
10823 instruction. The MODE argument is the machine mode for the MEM
10824 expression that wants to use this address.
10826 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10827 convert common non-canonical forms to canonical form so that they will
10831 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10832 rtx addr, bool strict)
10834 struct ix86_address parts;
10835 rtx base, index, disp;
10836 HOST_WIDE_INT scale;
10838 if (ix86_decompose_address (addr, &parts) <= 0)
10839 /* Decomposition failed. */
10843 index = parts.index;
10845 scale = parts.scale;
10847 /* Validate base register.
10849 Don't allow SUBREG's that span more than a word here. It can lead to spill
10850 failures when the base is one word out of a two word structure, which is
10851 represented internally as a DImode int. */
10859 else if (GET_CODE (base) == SUBREG
10860 && REG_P (SUBREG_REG (base))
10861 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10863 reg = SUBREG_REG (base);
10865 /* Base is not a register. */
10868 if (GET_MODE (base) != Pmode)
10869 /* Base is not in Pmode. */
10872 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10873 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10874 /* Base is not valid. */
10878 /* Validate index register.
10880 Don't allow SUBREG's that span more than a word here -- same as above. */
10888 else if (GET_CODE (index) == SUBREG
10889 && REG_P (SUBREG_REG (index))
10890 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10892 reg = SUBREG_REG (index);
10894 /* Index is not a register. */
10897 if (GET_MODE (index) != Pmode)
10898 /* Index is not in Pmode. */
10901 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10902 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10903 /* Index is not valid. */
10907 /* Validate scale factor. */
10911 /* Scale without index. */
10914 if (scale != 2 && scale != 4 && scale != 8)
10915 /* Scale is not a valid multiplier. */
10919 /* Validate displacement. */
10922 if (GET_CODE (disp) == CONST
10923 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10924 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10925 switch (XINT (XEXP (disp, 0), 1))
10927 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10928 used. While ABI specify also 32bit relocations, we don't produce
10929 them at all and use IP relative instead. */
10931 case UNSPEC_GOTOFF:
10932 gcc_assert (flag_pic);
10934 goto is_legitimate_pic;
10936 /* 64bit address unspec. */
10939 case UNSPEC_GOTPCREL:
10940 gcc_assert (flag_pic);
10941 goto is_legitimate_pic;
10943 case UNSPEC_GOTTPOFF:
10944 case UNSPEC_GOTNTPOFF:
10945 case UNSPEC_INDNTPOFF:
10946 case UNSPEC_NTPOFF:
10947 case UNSPEC_DTPOFF:
10951 /* Invalid address unspec. */
10955 else if (SYMBOLIC_CONST (disp)
10959 && MACHOPIC_INDIRECT
10960 && !machopic_operand_p (disp)
10966 if (TARGET_64BIT && (index || base))
10968 /* foo@dtpoff(%rX) is ok. */
10969 if (GET_CODE (disp) != CONST
10970 || GET_CODE (XEXP (disp, 0)) != PLUS
10971 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10972 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10973 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10974 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10975 /* Non-constant pic memory reference. */
10978 else if (! legitimate_pic_address_disp_p (disp))
10979 /* Displacement is an invalid pic construct. */
10982 /* This code used to verify that a symbolic pic displacement
10983 includes the pic_offset_table_rtx register.
10985 While this is good idea, unfortunately these constructs may
10986 be created by "adds using lea" optimization for incorrect
10995 This code is nonsensical, but results in addressing
10996 GOT table with pic_offset_table_rtx base. We can't
10997 just refuse it easily, since it gets matched by
10998 "addsi3" pattern, that later gets split to lea in the
10999 case output register differs from input. While this
11000 can be handled by separate addsi pattern for this case
11001 that never results in lea, this seems to be easier and
11002 correct fix for crash to disable this test. */
11004 else if (GET_CODE (disp) != LABEL_REF
11005 && !CONST_INT_P (disp)
11006 && (GET_CODE (disp) != CONST
11007 || !legitimate_constant_p (disp))
11008 && (GET_CODE (disp) != SYMBOL_REF
11009 || !legitimate_constant_p (disp)))
11010 /* Displacement is not constant. */
11012 else if (TARGET_64BIT
11013 && !x86_64_immediate_operand (disp, VOIDmode))
11014 /* Displacement is out of range. */
11018 /* Everything looks valid. */
11022 /* Determine if a given RTX is a valid constant address. */
11025 constant_address_p (rtx x)
11027 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11030 /* Return a unique alias set for the GOT. */
11032 static alias_set_type
11033 ix86_GOT_alias_set (void)
11035 static alias_set_type set = -1;
11037 set = new_alias_set ();
11041 /* Return a legitimate reference for ORIG (an address) using the
11042 register REG. If REG is 0, a new pseudo is generated.
11044 There are two types of references that must be handled:
11046 1. Global data references must load the address from the GOT, via
11047 the PIC reg. An insn is emitted to do this load, and the reg is
11050 2. Static data references, constant pool addresses, and code labels
11051 compute the address as an offset from the GOT, whose base is in
11052 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11053 differentiate them from global data objects. The returned
11054 address is the PIC reg + an unspec constant.
11056 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11057 reg also appears in the address. */
11060 legitimize_pic_address (rtx orig, rtx reg)
11063 rtx new_rtx = orig;
11067 if (TARGET_MACHO && !TARGET_64BIT)
11070 reg = gen_reg_rtx (Pmode);
11071 /* Use the generic Mach-O PIC machinery. */
11072 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11076 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11078 else if (TARGET_64BIT
11079 && ix86_cmodel != CM_SMALL_PIC
11080 && gotoff_operand (addr, Pmode))
11083 /* This symbol may be referenced via a displacement from the PIC
11084 base address (@GOTOFF). */
11086 if (reload_in_progress)
11087 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11088 if (GET_CODE (addr) == CONST)
11089 addr = XEXP (addr, 0);
11090 if (GET_CODE (addr) == PLUS)
11092 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11094 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11097 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11098 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11100 tmpreg = gen_reg_rtx (Pmode);
11103 emit_move_insn (tmpreg, new_rtx);
11107 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11108 tmpreg, 1, OPTAB_DIRECT);
11111 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11113 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11115 /* This symbol may be referenced via a displacement from the PIC
11116 base address (@GOTOFF). */
11118 if (reload_in_progress)
11119 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11120 if (GET_CODE (addr) == CONST)
11121 addr = XEXP (addr, 0);
11122 if (GET_CODE (addr) == PLUS)
11124 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11126 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11129 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11130 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11131 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11135 emit_move_insn (reg, new_rtx);
11139 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11140 /* We can't use @GOTOFF for text labels on VxWorks;
11141 see gotoff_operand. */
11142 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11144 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11146 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11147 return legitimize_dllimport_symbol (addr, true);
11148 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11149 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11150 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11152 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11153 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11157 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11159 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11160 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11161 new_rtx = gen_const_mem (Pmode, new_rtx);
11162 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11165 reg = gen_reg_rtx (Pmode);
11166 /* Use directly gen_movsi, otherwise the address is loaded
11167 into register for CSE. We don't want to CSE this addresses,
11168 instead we CSE addresses from the GOT table, so skip this. */
11169 emit_insn (gen_movsi (reg, new_rtx));
11174 /* This symbol must be referenced via a load from the
11175 Global Offset Table (@GOT). */
11177 if (reload_in_progress)
11178 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11179 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11180 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11182 new_rtx = force_reg (Pmode, new_rtx);
11183 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11184 new_rtx = gen_const_mem (Pmode, new_rtx);
11185 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11188 reg = gen_reg_rtx (Pmode);
11189 emit_move_insn (reg, new_rtx);
11195 if (CONST_INT_P (addr)
11196 && !x86_64_immediate_operand (addr, VOIDmode))
11200 emit_move_insn (reg, addr);
11204 new_rtx = force_reg (Pmode, addr);
11206 else if (GET_CODE (addr) == CONST)
11208 addr = XEXP (addr, 0);
11210 /* We must match stuff we generate before. Assume the only
11211 unspecs that can get here are ours. Not that we could do
11212 anything with them anyway.... */
11213 if (GET_CODE (addr) == UNSPEC
11214 || (GET_CODE (addr) == PLUS
11215 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11217 gcc_assert (GET_CODE (addr) == PLUS);
11219 if (GET_CODE (addr) == PLUS)
11221 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11223 /* Check first to see if this is a constant offset from a @GOTOFF
11224 symbol reference. */
11225 if (gotoff_operand (op0, Pmode)
11226 && CONST_INT_P (op1))
11230 if (reload_in_progress)
11231 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11232 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11234 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11235 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11236 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11240 emit_move_insn (reg, new_rtx);
11246 if (INTVAL (op1) < -16*1024*1024
11247 || INTVAL (op1) >= 16*1024*1024)
11249 if (!x86_64_immediate_operand (op1, Pmode))
11250 op1 = force_reg (Pmode, op1);
11251 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11257 base = legitimize_pic_address (XEXP (addr, 0), reg);
11258 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11259 base == reg ? NULL_RTX : reg);
11261 if (CONST_INT_P (new_rtx))
11262 new_rtx = plus_constant (base, INTVAL (new_rtx));
11265 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11267 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11268 new_rtx = XEXP (new_rtx, 1);
11270 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11278 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11281 get_thread_pointer (int to_reg)
11285 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11289 reg = gen_reg_rtx (Pmode);
11290 insn = gen_rtx_SET (VOIDmode, reg, tp);
11291 insn = emit_insn (insn);
11296 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11297 false if we expect this to be used for a memory address and true if
11298 we expect to load the address into a register. */
11301 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11303 rtx dest, base, off, pic, tp;
11308 case TLS_MODEL_GLOBAL_DYNAMIC:
11309 dest = gen_reg_rtx (Pmode);
11310 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11312 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11314 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11317 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11318 insns = get_insns ();
11321 RTL_CONST_CALL_P (insns) = 1;
11322 emit_libcall_block (insns, dest, rax, x);
11324 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11325 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11327 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11329 if (TARGET_GNU2_TLS)
11331 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11333 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11337 case TLS_MODEL_LOCAL_DYNAMIC:
11338 base = gen_reg_rtx (Pmode);
11339 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11341 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11343 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11346 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11347 insns = get_insns ();
11350 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11351 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11352 RTL_CONST_CALL_P (insns) = 1;
11353 emit_libcall_block (insns, base, rax, note);
11355 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11356 emit_insn (gen_tls_local_dynamic_base_64 (base));
11358 emit_insn (gen_tls_local_dynamic_base_32 (base));
11360 if (TARGET_GNU2_TLS)
11362 rtx x = ix86_tls_module_base ();
11364 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11365 gen_rtx_MINUS (Pmode, x, tp));
11368 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11369 off = gen_rtx_CONST (Pmode, off);
11371 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11373 if (TARGET_GNU2_TLS)
11375 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11377 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11382 case TLS_MODEL_INITIAL_EXEC:
11386 type = UNSPEC_GOTNTPOFF;
11390 if (reload_in_progress)
11391 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11392 pic = pic_offset_table_rtx;
11393 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11395 else if (!TARGET_ANY_GNU_TLS)
11397 pic = gen_reg_rtx (Pmode);
11398 emit_insn (gen_set_got (pic));
11399 type = UNSPEC_GOTTPOFF;
11404 type = UNSPEC_INDNTPOFF;
11407 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11408 off = gen_rtx_CONST (Pmode, off);
11410 off = gen_rtx_PLUS (Pmode, pic, off);
11411 off = gen_const_mem (Pmode, off);
11412 set_mem_alias_set (off, ix86_GOT_alias_set ());
11414 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11416 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11417 off = force_reg (Pmode, off);
11418 return gen_rtx_PLUS (Pmode, base, off);
11422 base = get_thread_pointer (true);
11423 dest = gen_reg_rtx (Pmode);
11424 emit_insn (gen_subsi3 (dest, base, off));
11428 case TLS_MODEL_LOCAL_EXEC:
11429 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11430 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11431 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11432 off = gen_rtx_CONST (Pmode, off);
11434 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11436 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11437 return gen_rtx_PLUS (Pmode, base, off);
11441 base = get_thread_pointer (true);
11442 dest = gen_reg_rtx (Pmode);
11443 emit_insn (gen_subsi3 (dest, base, off));
11448 gcc_unreachable ();
11454 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11457 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11458 htab_t dllimport_map;
11461 get_dllimport_decl (tree decl)
11463 struct tree_map *h, in;
11466 const char *prefix;
11467 size_t namelen, prefixlen;
11472 if (!dllimport_map)
11473 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11475 in.hash = htab_hash_pointer (decl);
11476 in.base.from = decl;
11477 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11478 h = (struct tree_map *) *loc;
11482 *loc = h = ggc_alloc_tree_map ();
11484 h->base.from = decl;
11485 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11486 VAR_DECL, NULL, ptr_type_node);
11487 DECL_ARTIFICIAL (to) = 1;
11488 DECL_IGNORED_P (to) = 1;
11489 DECL_EXTERNAL (to) = 1;
11490 TREE_READONLY (to) = 1;
11492 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11493 name = targetm.strip_name_encoding (name);
11494 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11495 ? "*__imp_" : "*__imp__";
11496 namelen = strlen (name);
11497 prefixlen = strlen (prefix);
11498 imp_name = (char *) alloca (namelen + prefixlen + 1);
11499 memcpy (imp_name, prefix, prefixlen);
11500 memcpy (imp_name + prefixlen, name, namelen + 1);
11502 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11503 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11504 SET_SYMBOL_REF_DECL (rtl, to);
11505 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11507 rtl = gen_const_mem (Pmode, rtl);
11508 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11510 SET_DECL_RTL (to, rtl);
11511 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11516 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11517 true if we require the result be a register. */
11520 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11525 gcc_assert (SYMBOL_REF_DECL (symbol));
11526 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11528 x = DECL_RTL (imp_decl);
11530 x = force_reg (Pmode, x);
11534 /* Try machine-dependent ways of modifying an illegitimate address
11535 to be legitimate. If we find one, return the new, valid address.
11536 This macro is used in only one place: `memory_address' in explow.c.
11538 OLDX is the address as it was before break_out_memory_refs was called.
11539 In some cases it is useful to look at this to decide what needs to be done.
11541 It is always safe for this macro to do nothing. It exists to recognize
11542 opportunities to optimize the output.
11544 For the 80386, we handle X+REG by loading X into a register R and
11545 using R+REG. R will go in a general reg and indexing will be used.
11546 However, if REG is a broken-out memory address or multiplication,
11547 nothing needs to be done because REG can certainly go in a general reg.
11549 When -fpic is used, special handling is needed for symbolic references.
11550 See comments by legitimize_pic_address in i386.c for details. */
11553 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11554 enum machine_mode mode)
11559 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11561 return legitimize_tls_address (x, (enum tls_model) log, false);
11562 if (GET_CODE (x) == CONST
11563 && GET_CODE (XEXP (x, 0)) == PLUS
11564 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11565 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11567 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11568 (enum tls_model) log, false);
11569 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11572 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11574 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11575 return legitimize_dllimport_symbol (x, true);
11576 if (GET_CODE (x) == CONST
11577 && GET_CODE (XEXP (x, 0)) == PLUS
11578 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11579 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11581 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11582 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11586 if (flag_pic && SYMBOLIC_CONST (x))
11587 return legitimize_pic_address (x, 0);
11589 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11590 if (GET_CODE (x) == ASHIFT
11591 && CONST_INT_P (XEXP (x, 1))
11592 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11595 log = INTVAL (XEXP (x, 1));
11596 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11597 GEN_INT (1 << log));
11600 if (GET_CODE (x) == PLUS)
11602 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11604 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11605 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11606 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11609 log = INTVAL (XEXP (XEXP (x, 0), 1));
11610 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11611 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11612 GEN_INT (1 << log));
11615 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11616 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11617 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11620 log = INTVAL (XEXP (XEXP (x, 1), 1));
11621 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11622 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11623 GEN_INT (1 << log));
11626 /* Put multiply first if it isn't already. */
11627 if (GET_CODE (XEXP (x, 1)) == MULT)
11629 rtx tmp = XEXP (x, 0);
11630 XEXP (x, 0) = XEXP (x, 1);
11635 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11636 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11637 created by virtual register instantiation, register elimination, and
11638 similar optimizations. */
11639 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11642 x = gen_rtx_PLUS (Pmode,
11643 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11644 XEXP (XEXP (x, 1), 0)),
11645 XEXP (XEXP (x, 1), 1));
11649 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11650 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11651 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11653 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11654 && CONSTANT_P (XEXP (x, 1)))
11657 rtx other = NULL_RTX;
11659 if (CONST_INT_P (XEXP (x, 1)))
11661 constant = XEXP (x, 1);
11662 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11664 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11666 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11667 other = XEXP (x, 1);
11675 x = gen_rtx_PLUS (Pmode,
11676 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11677 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11678 plus_constant (other, INTVAL (constant)));
11682 if (changed && ix86_legitimate_address_p (mode, x, false))
11685 if (GET_CODE (XEXP (x, 0)) == MULT)
11688 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11691 if (GET_CODE (XEXP (x, 1)) == MULT)
11694 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11698 && REG_P (XEXP (x, 1))
11699 && REG_P (XEXP (x, 0)))
11702 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11705 x = legitimize_pic_address (x, 0);
11708 if (changed && ix86_legitimate_address_p (mode, x, false))
11711 if (REG_P (XEXP (x, 0)))
11713 rtx temp = gen_reg_rtx (Pmode);
11714 rtx val = force_operand (XEXP (x, 1), temp);
11716 emit_move_insn (temp, val);
11718 XEXP (x, 1) = temp;
11722 else if (REG_P (XEXP (x, 1)))
11724 rtx temp = gen_reg_rtx (Pmode);
11725 rtx val = force_operand (XEXP (x, 0), temp);
11727 emit_move_insn (temp, val);
11729 XEXP (x, 0) = temp;
11737 /* Print an integer constant expression in assembler syntax. Addition
11738 and subtraction are the only arithmetic that may appear in these
11739 expressions. FILE is the stdio stream to write to, X is the rtx, and
11740 CODE is the operand print code from the output string. */
11743 output_pic_addr_const (FILE *file, rtx x, int code)
11747 switch (GET_CODE (x))
11750 gcc_assert (flag_pic);
11755 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11756 output_addr_const (file, x);
11759 const char *name = XSTR (x, 0);
11761 /* Mark the decl as referenced so that cgraph will
11762 output the function. */
11763 if (SYMBOL_REF_DECL (x))
11764 mark_decl_referenced (SYMBOL_REF_DECL (x));
11767 if (MACHOPIC_INDIRECT
11768 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11769 name = machopic_indirection_name (x, /*stub_p=*/true);
11771 assemble_name (file, name);
11773 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11774 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11775 fputs ("@PLT", file);
11782 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11783 assemble_name (asm_out_file, buf);
11787 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11791 /* This used to output parentheses around the expression,
11792 but that does not work on the 386 (either ATT or BSD assembler). */
11793 output_pic_addr_const (file, XEXP (x, 0), code);
11797 if (GET_MODE (x) == VOIDmode)
11799 /* We can use %d if the number is <32 bits and positive. */
11800 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11801 fprintf (file, "0x%lx%08lx",
11802 (unsigned long) CONST_DOUBLE_HIGH (x),
11803 (unsigned long) CONST_DOUBLE_LOW (x));
11805 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11808 /* We can't handle floating point constants;
11809 TARGET_PRINT_OPERAND must handle them. */
11810 output_operand_lossage ("floating constant misused");
11814 /* Some assemblers need integer constants to appear first. */
11815 if (CONST_INT_P (XEXP (x, 0)))
11817 output_pic_addr_const (file, XEXP (x, 0), code);
11819 output_pic_addr_const (file, XEXP (x, 1), code);
11823 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11824 output_pic_addr_const (file, XEXP (x, 1), code);
11826 output_pic_addr_const (file, XEXP (x, 0), code);
11832 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11833 output_pic_addr_const (file, XEXP (x, 0), code);
11835 output_pic_addr_const (file, XEXP (x, 1), code);
11837 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11841 gcc_assert (XVECLEN (x, 0) == 1);
11842 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11843 switch (XINT (x, 1))
11846 fputs ("@GOT", file);
11848 case UNSPEC_GOTOFF:
11849 fputs ("@GOTOFF", file);
11851 case UNSPEC_PLTOFF:
11852 fputs ("@PLTOFF", file);
11854 case UNSPEC_GOTPCREL:
11855 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11856 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11858 case UNSPEC_GOTTPOFF:
11859 /* FIXME: This might be @TPOFF in Sun ld too. */
11860 fputs ("@gottpoff", file);
11863 fputs ("@tpoff", file);
11865 case UNSPEC_NTPOFF:
11867 fputs ("@tpoff", file);
11869 fputs ("@ntpoff", file);
11871 case UNSPEC_DTPOFF:
11872 fputs ("@dtpoff", file);
11874 case UNSPEC_GOTNTPOFF:
11876 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11877 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11879 fputs ("@gotntpoff", file);
11881 case UNSPEC_INDNTPOFF:
11882 fputs ("@indntpoff", file);
11885 case UNSPEC_MACHOPIC_OFFSET:
11887 machopic_output_function_base_name (file);
11891 output_operand_lossage ("invalid UNSPEC as operand");
11897 output_operand_lossage ("invalid expression as operand");
11901 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11902 We need to emit DTP-relative relocations. */
11904 static void ATTRIBUTE_UNUSED
11905 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11907 fputs (ASM_LONG, file);
11908 output_addr_const (file, x);
11909 fputs ("@dtpoff", file);
11915 fputs (", 0", file);
11918 gcc_unreachable ();
11922 /* Return true if X is a representation of the PIC register. This copes
11923 with calls from ix86_find_base_term, where the register might have
11924 been replaced by a cselib value. */
11927 ix86_pic_register_p (rtx x)
11929 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11930 return (pic_offset_table_rtx
11931 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11933 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11936 /* In the name of slightly smaller debug output, and to cater to
11937 general assembler lossage, recognize PIC+GOTOFF and turn it back
11938 into a direct symbol reference.
11940 On Darwin, this is necessary to avoid a crash, because Darwin
11941 has a different PIC label for each routine but the DWARF debugging
11942 information is not associated with any particular routine, so it's
11943 necessary to remove references to the PIC label from RTL stored by
11944 the DWARF output code. */
11947 ix86_delegitimize_address (rtx x)
11949 rtx orig_x = delegitimize_mem_from_attrs (x);
11950 /* addend is NULL or some rtx if x is something+GOTOFF where
11951 something doesn't include the PIC register. */
11952 rtx addend = NULL_RTX;
11953 /* reg_addend is NULL or a multiple of some register. */
11954 rtx reg_addend = NULL_RTX;
11955 /* const_addend is NULL or a const_int. */
11956 rtx const_addend = NULL_RTX;
11957 /* This is the result, or NULL. */
11958 rtx result = NULL_RTX;
11967 if (GET_CODE (x) != CONST
11968 || GET_CODE (XEXP (x, 0)) != UNSPEC
11969 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11970 || !MEM_P (orig_x))
11972 x = XVECEXP (XEXP (x, 0), 0, 0);
11973 if (GET_MODE (orig_x) != Pmode)
11974 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11978 if (GET_CODE (x) != PLUS
11979 || GET_CODE (XEXP (x, 1)) != CONST)
11982 if (ix86_pic_register_p (XEXP (x, 0)))
11983 /* %ebx + GOT/GOTOFF */
11985 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11987 /* %ebx + %reg * scale + GOT/GOTOFF */
11988 reg_addend = XEXP (x, 0);
11989 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11990 reg_addend = XEXP (reg_addend, 1);
11991 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11992 reg_addend = XEXP (reg_addend, 0);
11995 reg_addend = NULL_RTX;
11996 addend = XEXP (x, 0);
12000 addend = XEXP (x, 0);
12002 x = XEXP (XEXP (x, 1), 0);
12003 if (GET_CODE (x) == PLUS
12004 && CONST_INT_P (XEXP (x, 1)))
12006 const_addend = XEXP (x, 1);
12010 if (GET_CODE (x) == UNSPEC
12011 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12012 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12013 result = XVECEXP (x, 0, 0);
12015 if (TARGET_MACHO && darwin_local_data_pic (x)
12016 && !MEM_P (orig_x))
12017 result = XVECEXP (x, 0, 0);
12023 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12025 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12028 /* If the rest of original X doesn't involve the PIC register, add
12029 addend and subtract pic_offset_table_rtx. This can happen e.g.
12031 leal (%ebx, %ecx, 4), %ecx
12033 movl foo@GOTOFF(%ecx), %edx
12034 in which case we return (%ecx - %ebx) + foo. */
12035 if (pic_offset_table_rtx)
12036 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12037 pic_offset_table_rtx),
12042 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12043 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12047 /* If X is a machine specific address (i.e. a symbol or label being
12048 referenced as a displacement from the GOT implemented using an
12049 UNSPEC), then return the base term. Otherwise return X. */
12052 ix86_find_base_term (rtx x)
12058 if (GET_CODE (x) != CONST)
12060 term = XEXP (x, 0);
12061 if (GET_CODE (term) == PLUS
12062 && (CONST_INT_P (XEXP (term, 1))
12063 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12064 term = XEXP (term, 0);
12065 if (GET_CODE (term) != UNSPEC
12066 || XINT (term, 1) != UNSPEC_GOTPCREL)
12069 return XVECEXP (term, 0, 0);
12072 return ix86_delegitimize_address (x);
12076 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12077 int fp, FILE *file)
12079 const char *suffix;
12081 if (mode == CCFPmode || mode == CCFPUmode)
12083 code = ix86_fp_compare_code_to_integer (code);
12087 code = reverse_condition (code);
12138 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12142 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12143 Those same assemblers have the same but opposite lossage on cmov. */
12144 if (mode == CCmode)
12145 suffix = fp ? "nbe" : "a";
12146 else if (mode == CCCmode)
12149 gcc_unreachable ();
12165 gcc_unreachable ();
12169 gcc_assert (mode == CCmode || mode == CCCmode);
12186 gcc_unreachable ();
12190 /* ??? As above. */
12191 gcc_assert (mode == CCmode || mode == CCCmode);
12192 suffix = fp ? "nb" : "ae";
12195 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12199 /* ??? As above. */
12200 if (mode == CCmode)
12202 else if (mode == CCCmode)
12203 suffix = fp ? "nb" : "ae";
12205 gcc_unreachable ();
12208 suffix = fp ? "u" : "p";
12211 suffix = fp ? "nu" : "np";
12214 gcc_unreachable ();
12216 fputs (suffix, file);
12219 /* Print the name of register X to FILE based on its machine mode and number.
12220 If CODE is 'w', pretend the mode is HImode.
12221 If CODE is 'b', pretend the mode is QImode.
12222 If CODE is 'k', pretend the mode is SImode.
12223 If CODE is 'q', pretend the mode is DImode.
12224 If CODE is 'x', pretend the mode is V4SFmode.
12225 If CODE is 't', pretend the mode is V8SFmode.
12226 If CODE is 'h', pretend the reg is the 'high' byte register.
12227 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12228 If CODE is 'd', duplicate the operand for AVX instruction.
12232 print_reg (rtx x, int code, FILE *file)
12235 bool duplicated = code == 'd' && TARGET_AVX;
12237 gcc_assert (x == pc_rtx
12238 || (REGNO (x) != ARG_POINTER_REGNUM
12239 && REGNO (x) != FRAME_POINTER_REGNUM
12240 && REGNO (x) != FLAGS_REG
12241 && REGNO (x) != FPSR_REG
12242 && REGNO (x) != FPCR_REG));
12244 if (ASSEMBLER_DIALECT == ASM_ATT)
12249 gcc_assert (TARGET_64BIT);
12250 fputs ("rip", file);
12254 if (code == 'w' || MMX_REG_P (x))
12256 else if (code == 'b')
12258 else if (code == 'k')
12260 else if (code == 'q')
12262 else if (code == 'y')
12264 else if (code == 'h')
12266 else if (code == 'x')
12268 else if (code == 't')
12271 code = GET_MODE_SIZE (GET_MODE (x));
12273 /* Irritatingly, AMD extended registers use different naming convention
12274 from the normal registers. */
12275 if (REX_INT_REG_P (x))
12277 gcc_assert (TARGET_64BIT);
12281 error ("extended registers have no high halves");
12284 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12287 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12290 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12293 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12296 error ("unsupported operand size for extended register");
12306 if (STACK_TOP_P (x))
12315 if (! ANY_FP_REG_P (x))
12316 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12321 reg = hi_reg_name[REGNO (x)];
12324 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12326 reg = qi_reg_name[REGNO (x)];
12329 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12331 reg = qi_high_reg_name[REGNO (x)];
12336 gcc_assert (!duplicated);
12338 fputs (hi_reg_name[REGNO (x)] + 1, file);
12343 gcc_unreachable ();
12349 if (ASSEMBLER_DIALECT == ASM_ATT)
12350 fprintf (file, ", %%%s", reg);
12352 fprintf (file, ", %s", reg);
12356 /* Locate some local-dynamic symbol still in use by this function
12357 so that we can print its name in some tls_local_dynamic_base
12361 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12365 if (GET_CODE (x) == SYMBOL_REF
12366 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12368 cfun->machine->some_ld_name = XSTR (x, 0);
12375 static const char *
12376 get_some_local_dynamic_name (void)
12380 if (cfun->machine->some_ld_name)
12381 return cfun->machine->some_ld_name;
12383 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12384 if (NONDEBUG_INSN_P (insn)
12385 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12386 return cfun->machine->some_ld_name;
12391 /* Meaning of CODE:
12392 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12393 C -- print opcode suffix for set/cmov insn.
12394 c -- like C, but print reversed condition
12395 F,f -- likewise, but for floating-point.
12396 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12398 R -- print the prefix for register names.
12399 z -- print the opcode suffix for the size of the current operand.
12400 Z -- likewise, with special suffixes for x87 instructions.
12401 * -- print a star (in certain assembler syntax)
12402 A -- print an absolute memory reference.
12403 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12404 s -- print a shift double count, followed by the assemblers argument
12406 b -- print the QImode name of the register for the indicated operand.
12407 %b0 would print %al if operands[0] is reg 0.
12408 w -- likewise, print the HImode name of the register.
12409 k -- likewise, print the SImode name of the register.
12410 q -- likewise, print the DImode name of the register.
12411 x -- likewise, print the V4SFmode name of the register.
12412 t -- likewise, print the V8SFmode name of the register.
12413 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12414 y -- print "st(0)" instead of "st" as a register.
12415 d -- print duplicated register operand for AVX instruction.
12416 D -- print condition for SSE cmp instruction.
12417 P -- if PIC, print an @PLT suffix.
12418 X -- don't print any sort of PIC '@' suffix for a symbol.
12419 & -- print some in-use local-dynamic symbol name.
12420 H -- print a memory address offset by 8; used for sse high-parts
12421 Y -- print condition for XOP pcom* instruction.
12422 + -- print a branch hint as 'cs' or 'ds' prefix
12423 ; -- print a semicolon (after prefixes due to bug in older gas).
12424 @ -- print a segment register of thread base pointer load
12428 ix86_print_operand (FILE *file, rtx x, int code)
12435 if (ASSEMBLER_DIALECT == ASM_ATT)
12441 const char *name = get_some_local_dynamic_name ();
12443 output_operand_lossage ("'%%&' used without any "
12444 "local dynamic TLS references");
12446 assemble_name (file, name);
12451 switch (ASSEMBLER_DIALECT)
12458 /* Intel syntax. For absolute addresses, registers should not
12459 be surrounded by braces. */
12463 ix86_print_operand (file, x, 0);
12470 gcc_unreachable ();
12473 ix86_print_operand (file, x, 0);
12478 if (ASSEMBLER_DIALECT == ASM_ATT)
12483 if (ASSEMBLER_DIALECT == ASM_ATT)
12488 if (ASSEMBLER_DIALECT == ASM_ATT)
12493 if (ASSEMBLER_DIALECT == ASM_ATT)
12498 if (ASSEMBLER_DIALECT == ASM_ATT)
12503 if (ASSEMBLER_DIALECT == ASM_ATT)
12508 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12510 /* Opcodes don't get size suffixes if using Intel opcodes. */
12511 if (ASSEMBLER_DIALECT == ASM_INTEL)
12514 switch (GET_MODE_SIZE (GET_MODE (x)))
12533 output_operand_lossage
12534 ("invalid operand size for operand code '%c'", code);
12539 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12541 (0, "non-integer operand used with operand code '%c'", code);
12545 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12546 if (ASSEMBLER_DIALECT == ASM_INTEL)
12549 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12551 switch (GET_MODE_SIZE (GET_MODE (x)))
12554 #ifdef HAVE_AS_IX86_FILDS
12564 #ifdef HAVE_AS_IX86_FILDQ
12567 fputs ("ll", file);
12575 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12577 /* 387 opcodes don't get size suffixes
12578 if the operands are registers. */
12579 if (STACK_REG_P (x))
12582 switch (GET_MODE_SIZE (GET_MODE (x)))
12603 output_operand_lossage
12604 ("invalid operand type used with operand code '%c'", code);
12608 output_operand_lossage
12609 ("invalid operand size for operand code '%c'", code);
12626 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12628 ix86_print_operand (file, x, 0);
12629 fputs (", ", file);
12634 /* Little bit of braindamage here. The SSE compare instructions
12635 does use completely different names for the comparisons that the
12636 fp conditional moves. */
12639 switch (GET_CODE (x))
12642 fputs ("eq", file);
12645 fputs ("eq_us", file);
12648 fputs ("lt", file);
12651 fputs ("nge", file);
12654 fputs ("le", file);
12657 fputs ("ngt", file);
12660 fputs ("unord", file);
12663 fputs ("neq", file);
12666 fputs ("neq_oq", file);
12669 fputs ("ge", file);
12672 fputs ("nlt", file);
12675 fputs ("gt", file);
12678 fputs ("nle", file);
12681 fputs ("ord", file);
12684 output_operand_lossage ("operand is not a condition code, "
12685 "invalid operand code 'D'");
12691 switch (GET_CODE (x))
12695 fputs ("eq", file);
12699 fputs ("lt", file);
12703 fputs ("le", file);
12706 fputs ("unord", file);
12710 fputs ("neq", file);
12714 fputs ("nlt", file);
12718 fputs ("nle", file);
12721 fputs ("ord", file);
12724 output_operand_lossage ("operand is not a condition code, "
12725 "invalid operand code 'D'");
12731 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12732 if (ASSEMBLER_DIALECT == ASM_ATT)
12734 switch (GET_MODE (x))
12736 case HImode: putc ('w', file); break;
12738 case SFmode: putc ('l', file); break;
12740 case DFmode: putc ('q', file); break;
12741 default: gcc_unreachable ();
12748 if (!COMPARISON_P (x))
12750 output_operand_lossage ("operand is neither a constant nor a "
12751 "condition code, invalid operand code "
12755 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12758 if (!COMPARISON_P (x))
12760 output_operand_lossage ("operand is neither a constant nor a "
12761 "condition code, invalid operand code "
12765 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12766 if (ASSEMBLER_DIALECT == ASM_ATT)
12769 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12772 /* Like above, but reverse condition */
12774 /* Check to see if argument to %c is really a constant
12775 and not a condition code which needs to be reversed. */
12776 if (!COMPARISON_P (x))
12778 output_operand_lossage ("operand is neither a constant nor a "
12779 "condition code, invalid operand "
12783 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12786 if (!COMPARISON_P (x))
12788 output_operand_lossage ("operand is neither a constant nor a "
12789 "condition code, invalid operand "
12793 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12794 if (ASSEMBLER_DIALECT == ASM_ATT)
12797 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12801 /* It doesn't actually matter what mode we use here, as we're
12802 only going to use this for printing. */
12803 x = adjust_address_nv (x, DImode, 8);
12811 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12814 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12817 int pred_val = INTVAL (XEXP (x, 0));
12819 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12820 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12822 int taken = pred_val > REG_BR_PROB_BASE / 2;
12823 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12825 /* Emit hints only in the case default branch prediction
12826 heuristics would fail. */
12827 if (taken != cputaken)
12829 /* We use 3e (DS) prefix for taken branches and
12830 2e (CS) prefix for not taken branches. */
12832 fputs ("ds ; ", file);
12834 fputs ("cs ; ", file);
12842 switch (GET_CODE (x))
12845 fputs ("neq", file);
12848 fputs ("eq", file);
12852 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12856 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12860 fputs ("le", file);
12864 fputs ("lt", file);
12867 fputs ("unord", file);
12870 fputs ("ord", file);
12873 fputs ("ueq", file);
12876 fputs ("nlt", file);
12879 fputs ("nle", file);
12882 fputs ("ule", file);
12885 fputs ("ult", file);
12888 fputs ("une", file);
12891 output_operand_lossage ("operand is not a condition code, "
12892 "invalid operand code 'Y'");
12898 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12904 if (ASSEMBLER_DIALECT == ASM_ATT)
12907 /* The kernel uses a different segment register for performance
12908 reasons; a system call would not have to trash the userspace
12909 segment register, which would be expensive. */
12910 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
12911 fputs ("fs", file);
12913 fputs ("gs", file);
12917 output_operand_lossage ("invalid operand code '%c'", code);
12922 print_reg (x, code, file);
12924 else if (MEM_P (x))
12926 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12927 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12928 && GET_MODE (x) != BLKmode)
12931 switch (GET_MODE_SIZE (GET_MODE (x)))
12933 case 1: size = "BYTE"; break;
12934 case 2: size = "WORD"; break;
12935 case 4: size = "DWORD"; break;
12936 case 8: size = "QWORD"; break;
12937 case 12: size = "TBYTE"; break;
12939 if (GET_MODE (x) == XFmode)
12944 case 32: size = "YMMWORD"; break;
12946 gcc_unreachable ();
12949 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12952 else if (code == 'w')
12954 else if (code == 'k')
12957 fputs (size, file);
12958 fputs (" PTR ", file);
12962 /* Avoid (%rip) for call operands. */
12963 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12964 && !CONST_INT_P (x))
12965 output_addr_const (file, x);
12966 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12967 output_operand_lossage ("invalid constraints for operand");
12969 output_address (x);
12972 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12977 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12978 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12980 if (ASSEMBLER_DIALECT == ASM_ATT)
12982 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12984 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
12986 fprintf (file, "0x%08x", (unsigned int) l);
12989 /* These float cases don't actually occur as immediate operands. */
12990 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12994 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12995 fputs (dstr, file);
12998 else if (GET_CODE (x) == CONST_DOUBLE
12999 && GET_MODE (x) == XFmode)
13003 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13004 fputs (dstr, file);
13009 /* We have patterns that allow zero sets of memory, for instance.
13010 In 64-bit mode, we should probably support all 8-byte vectors,
13011 since we can in fact encode that into an immediate. */
13012 if (GET_CODE (x) == CONST_VECTOR)
13014 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13020 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13022 if (ASSEMBLER_DIALECT == ASM_ATT)
13025 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13026 || GET_CODE (x) == LABEL_REF)
13028 if (ASSEMBLER_DIALECT == ASM_ATT)
13031 fputs ("OFFSET FLAT:", file);
13034 if (CONST_INT_P (x))
13035 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13037 output_pic_addr_const (file, x, code);
13039 output_addr_const (file, x);
13044 ix86_print_operand_punct_valid_p (unsigned char code)
13046 return (code == '@' || code == '*' || code == '+'
13047 || code == '&' || code == ';');
13050 /* Print a memory operand whose address is ADDR. */
13053 ix86_print_operand_address (FILE *file, rtx addr)
13055 struct ix86_address parts;
13056 rtx base, index, disp;
13058 int ok = ix86_decompose_address (addr, &parts);
13063 index = parts.index;
13065 scale = parts.scale;
13073 if (ASSEMBLER_DIALECT == ASM_ATT)
13075 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13078 gcc_unreachable ();
13081 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13082 if (TARGET_64BIT && !base && !index)
13086 if (GET_CODE (disp) == CONST
13087 && GET_CODE (XEXP (disp, 0)) == PLUS
13088 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13089 symbol = XEXP (XEXP (disp, 0), 0);
13091 if (GET_CODE (symbol) == LABEL_REF
13092 || (GET_CODE (symbol) == SYMBOL_REF
13093 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13096 if (!base && !index)
13098 /* Displacement only requires special attention. */
13100 if (CONST_INT_P (disp))
13102 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13103 fputs ("ds:", file);
13104 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13107 output_pic_addr_const (file, disp, 0);
13109 output_addr_const (file, disp);
13113 if (ASSEMBLER_DIALECT == ASM_ATT)
13118 output_pic_addr_const (file, disp, 0);
13119 else if (GET_CODE (disp) == LABEL_REF)
13120 output_asm_label (disp);
13122 output_addr_const (file, disp);
13127 print_reg (base, 0, file);
13131 print_reg (index, 0, file);
13133 fprintf (file, ",%d", scale);
13139 rtx offset = NULL_RTX;
13143 /* Pull out the offset of a symbol; print any symbol itself. */
13144 if (GET_CODE (disp) == CONST
13145 && GET_CODE (XEXP (disp, 0)) == PLUS
13146 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13148 offset = XEXP (XEXP (disp, 0), 1);
13149 disp = gen_rtx_CONST (VOIDmode,
13150 XEXP (XEXP (disp, 0), 0));
13154 output_pic_addr_const (file, disp, 0);
13155 else if (GET_CODE (disp) == LABEL_REF)
13156 output_asm_label (disp);
13157 else if (CONST_INT_P (disp))
13160 output_addr_const (file, disp);
13166 print_reg (base, 0, file);
13169 if (INTVAL (offset) >= 0)
13171 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13175 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13182 print_reg (index, 0, file);
13184 fprintf (file, "*%d", scale);
13191 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13194 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13198 if (GET_CODE (x) != UNSPEC)
13201 op = XVECEXP (x, 0, 0);
13202 switch (XINT (x, 1))
13204 case UNSPEC_GOTTPOFF:
13205 output_addr_const (file, op);
13206 /* FIXME: This might be @TPOFF in Sun ld. */
13207 fputs ("@gottpoff", file);
13210 output_addr_const (file, op);
13211 fputs ("@tpoff", file);
13213 case UNSPEC_NTPOFF:
13214 output_addr_const (file, op);
13216 fputs ("@tpoff", file);
13218 fputs ("@ntpoff", file);
13220 case UNSPEC_DTPOFF:
13221 output_addr_const (file, op);
13222 fputs ("@dtpoff", file);
13224 case UNSPEC_GOTNTPOFF:
13225 output_addr_const (file, op);
13227 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13228 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13230 fputs ("@gotntpoff", file);
13232 case UNSPEC_INDNTPOFF:
13233 output_addr_const (file, op);
13234 fputs ("@indntpoff", file);
13237 case UNSPEC_MACHOPIC_OFFSET:
13238 output_addr_const (file, op);
13240 machopic_output_function_base_name (file);
13251 /* Split one or more DImode RTL references into pairs of SImode
13252 references. The RTL can be REG, offsettable MEM, integer constant, or
13253 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13254 split and "num" is its length. lo_half and hi_half are output arrays
13255 that parallel "operands". */
13258 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13262 rtx op = operands[num];
13264 /* simplify_subreg refuse to split volatile memory addresses,
13265 but we still have to handle it. */
13268 lo_half[num] = adjust_address (op, SImode, 0);
13269 hi_half[num] = adjust_address (op, SImode, 4);
13273 lo_half[num] = simplify_gen_subreg (SImode, op,
13274 GET_MODE (op) == VOIDmode
13275 ? DImode : GET_MODE (op), 0);
13276 hi_half[num] = simplify_gen_subreg (SImode, op,
13277 GET_MODE (op) == VOIDmode
13278 ? DImode : GET_MODE (op), 4);
13282 /* Split one or more TImode RTL references into pairs of DImode
13283 references. The RTL can be REG, offsettable MEM, integer constant, or
13284 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13285 split and "num" is its length. lo_half and hi_half are output arrays
13286 that parallel "operands". */
13289 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13293 rtx op = operands[num];
13295 /* simplify_subreg refuse to split volatile memory addresses, but we
13296 still have to handle it. */
13299 lo_half[num] = adjust_address (op, DImode, 0);
13300 hi_half[num] = adjust_address (op, DImode, 8);
13304 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13305 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13310 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13311 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13312 is the expression of the binary operation. The output may either be
13313 emitted here, or returned to the caller, like all output_* functions.
13315 There is no guarantee that the operands are the same mode, as they
13316 might be within FLOAT or FLOAT_EXTEND expressions. */
13318 #ifndef SYSV386_COMPAT
13319 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13320 wants to fix the assemblers because that causes incompatibility
13321 with gcc. No-one wants to fix gcc because that causes
13322 incompatibility with assemblers... You can use the option of
13323 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13324 #define SYSV386_COMPAT 1
13328 output_387_binary_op (rtx insn, rtx *operands)
13330 static char buf[40];
13333 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13335 #ifdef ENABLE_CHECKING
13336 /* Even if we do not want to check the inputs, this documents input
13337 constraints. Which helps in understanding the following code. */
13338 if (STACK_REG_P (operands[0])
13339 && ((REG_P (operands[1])
13340 && REGNO (operands[0]) == REGNO (operands[1])
13341 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13342 || (REG_P (operands[2])
13343 && REGNO (operands[0]) == REGNO (operands[2])
13344 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13345 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13348 gcc_assert (is_sse);
13351 switch (GET_CODE (operands[3]))
13354 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13355 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13363 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13364 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13372 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13373 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13381 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13382 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13390 gcc_unreachable ();
13397 strcpy (buf, ssep);
13398 if (GET_MODE (operands[0]) == SFmode)
13399 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13401 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13405 strcpy (buf, ssep + 1);
13406 if (GET_MODE (operands[0]) == SFmode)
13407 strcat (buf, "ss\t{%2, %0|%0, %2}");
13409 strcat (buf, "sd\t{%2, %0|%0, %2}");
13415 switch (GET_CODE (operands[3]))
13419 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13421 rtx temp = operands[2];
13422 operands[2] = operands[1];
13423 operands[1] = temp;
13426 /* know operands[0] == operands[1]. */
13428 if (MEM_P (operands[2]))
13434 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13436 if (STACK_TOP_P (operands[0]))
13437 /* How is it that we are storing to a dead operand[2]?
13438 Well, presumably operands[1] is dead too. We can't
13439 store the result to st(0) as st(0) gets popped on this
13440 instruction. Instead store to operands[2] (which I
13441 think has to be st(1)). st(1) will be popped later.
13442 gcc <= 2.8.1 didn't have this check and generated
13443 assembly code that the Unixware assembler rejected. */
13444 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13446 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13450 if (STACK_TOP_P (operands[0]))
13451 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13453 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13458 if (MEM_P (operands[1]))
13464 if (MEM_P (operands[2]))
13470 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13473 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13474 derived assemblers, confusingly reverse the direction of
13475 the operation for fsub{r} and fdiv{r} when the
13476 destination register is not st(0). The Intel assembler
13477 doesn't have this brain damage. Read !SYSV386_COMPAT to
13478 figure out what the hardware really does. */
13479 if (STACK_TOP_P (operands[0]))
13480 p = "{p\t%0, %2|rp\t%2, %0}";
13482 p = "{rp\t%2, %0|p\t%0, %2}";
13484 if (STACK_TOP_P (operands[0]))
13485 /* As above for fmul/fadd, we can't store to st(0). */
13486 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13488 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13493 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13496 if (STACK_TOP_P (operands[0]))
13497 p = "{rp\t%0, %1|p\t%1, %0}";
13499 p = "{p\t%1, %0|rp\t%0, %1}";
13501 if (STACK_TOP_P (operands[0]))
13502 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13504 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13509 if (STACK_TOP_P (operands[0]))
13511 if (STACK_TOP_P (operands[1]))
13512 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13514 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13517 else if (STACK_TOP_P (operands[1]))
13520 p = "{\t%1, %0|r\t%0, %1}";
13522 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13528 p = "{r\t%2, %0|\t%0, %2}";
13530 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13536 gcc_unreachable ();
13543 /* Return needed mode for entity in optimize_mode_switching pass. */
13546 ix86_mode_needed (int entity, rtx insn)
13548 enum attr_i387_cw mode;
13550 /* The mode UNINITIALIZED is used to store control word after a
13551 function call or ASM pattern. The mode ANY specify that function
13552 has no requirements on the control word and make no changes in the
13553 bits we are interested in. */
13556 || (NONJUMP_INSN_P (insn)
13557 && (asm_noperands (PATTERN (insn)) >= 0
13558 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13559 return I387_CW_UNINITIALIZED;
13561 if (recog_memoized (insn) < 0)
13562 return I387_CW_ANY;
13564 mode = get_attr_i387_cw (insn);
13569 if (mode == I387_CW_TRUNC)
13574 if (mode == I387_CW_FLOOR)
13579 if (mode == I387_CW_CEIL)
13584 if (mode == I387_CW_MASK_PM)
13589 gcc_unreachable ();
13592 return I387_CW_ANY;
13595 /* Output code to initialize control word copies used by trunc?f?i and
13596 rounding patterns. CURRENT_MODE is set to current control word,
13597 while NEW_MODE is set to new control word. */
13600 emit_i387_cw_initialization (int mode)
13602 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13605 enum ix86_stack_slot slot;
13607 rtx reg = gen_reg_rtx (HImode);
13609 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13610 emit_move_insn (reg, copy_rtx (stored_mode));
13612 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13613 || optimize_function_for_size_p (cfun))
13617 case I387_CW_TRUNC:
13618 /* round toward zero (truncate) */
13619 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13620 slot = SLOT_CW_TRUNC;
13623 case I387_CW_FLOOR:
13624 /* round down toward -oo */
13625 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13626 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13627 slot = SLOT_CW_FLOOR;
13631 /* round up toward +oo */
13632 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13633 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13634 slot = SLOT_CW_CEIL;
13637 case I387_CW_MASK_PM:
13638 /* mask precision exception for nearbyint() */
13639 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13640 slot = SLOT_CW_MASK_PM;
13644 gcc_unreachable ();
13651 case I387_CW_TRUNC:
13652 /* round toward zero (truncate) */
13653 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13654 slot = SLOT_CW_TRUNC;
13657 case I387_CW_FLOOR:
13658 /* round down toward -oo */
13659 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13660 slot = SLOT_CW_FLOOR;
13664 /* round up toward +oo */
13665 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13666 slot = SLOT_CW_CEIL;
13669 case I387_CW_MASK_PM:
13670 /* mask precision exception for nearbyint() */
13671 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13672 slot = SLOT_CW_MASK_PM;
13676 gcc_unreachable ();
13680 gcc_assert (slot < MAX_386_STACK_LOCALS);
13682 new_mode = assign_386_stack_local (HImode, slot);
13683 emit_move_insn (new_mode, reg);
13686 /* Output code for INSN to convert a float to a signed int. OPERANDS
13687 are the insn operands. The output may be [HSD]Imode and the input
13688 operand may be [SDX]Fmode. */
13691 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13693 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13694 int dimode_p = GET_MODE (operands[0]) == DImode;
13695 int round_mode = get_attr_i387_cw (insn);
13697 /* Jump through a hoop or two for DImode, since the hardware has no
13698 non-popping instruction. We used to do this a different way, but
13699 that was somewhat fragile and broke with post-reload splitters. */
13700 if ((dimode_p || fisttp) && !stack_top_dies)
13701 output_asm_insn ("fld\t%y1", operands);
13703 gcc_assert (STACK_TOP_P (operands[1]));
13704 gcc_assert (MEM_P (operands[0]));
13705 gcc_assert (GET_MODE (operands[1]) != TFmode);
13708 output_asm_insn ("fisttp%Z0\t%0", operands);
13711 if (round_mode != I387_CW_ANY)
13712 output_asm_insn ("fldcw\t%3", operands);
13713 if (stack_top_dies || dimode_p)
13714 output_asm_insn ("fistp%Z0\t%0", operands);
13716 output_asm_insn ("fist%Z0\t%0", operands);
13717 if (round_mode != I387_CW_ANY)
13718 output_asm_insn ("fldcw\t%2", operands);
13724 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13725 have the values zero or one, indicates the ffreep insn's operand
13726 from the OPERANDS array. */
13728 static const char *
13729 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13731 if (TARGET_USE_FFREEP)
13732 #ifdef HAVE_AS_IX86_FFREEP
13733 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13736 static char retval[32];
13737 int regno = REGNO (operands[opno]);
13739 gcc_assert (FP_REGNO_P (regno));
13741 regno -= FIRST_STACK_REG;
13743 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13748 return opno ? "fstp\t%y1" : "fstp\t%y0";
13752 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13753 should be used. UNORDERED_P is true when fucom should be used. */
13756 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13758 int stack_top_dies;
13759 rtx cmp_op0, cmp_op1;
13760 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13764 cmp_op0 = operands[0];
13765 cmp_op1 = operands[1];
13769 cmp_op0 = operands[1];
13770 cmp_op1 = operands[2];
13775 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13776 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13777 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13778 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13780 if (GET_MODE (operands[0]) == SFmode)
13782 return &ucomiss[TARGET_AVX ? 0 : 1];
13784 return &comiss[TARGET_AVX ? 0 : 1];
13787 return &ucomisd[TARGET_AVX ? 0 : 1];
13789 return &comisd[TARGET_AVX ? 0 : 1];
13792 gcc_assert (STACK_TOP_P (cmp_op0));
13794 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13796 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13798 if (stack_top_dies)
13800 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13801 return output_387_ffreep (operands, 1);
13804 return "ftst\n\tfnstsw\t%0";
13807 if (STACK_REG_P (cmp_op1)
13809 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13810 && REGNO (cmp_op1) != FIRST_STACK_REG)
13812 /* If both the top of the 387 stack dies, and the other operand
13813 is also a stack register that dies, then this must be a
13814 `fcompp' float compare */
13818 /* There is no double popping fcomi variant. Fortunately,
13819 eflags is immune from the fstp's cc clobbering. */
13821 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13823 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13824 return output_387_ffreep (operands, 0);
13829 return "fucompp\n\tfnstsw\t%0";
13831 return "fcompp\n\tfnstsw\t%0";
13836 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13838 static const char * const alt[16] =
13840 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13841 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13842 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13843 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13845 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13846 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13850 "fcomi\t{%y1, %0|%0, %y1}",
13851 "fcomip\t{%y1, %0|%0, %y1}",
13852 "fucomi\t{%y1, %0|%0, %y1}",
13853 "fucomip\t{%y1, %0|%0, %y1}",
13864 mask = eflags_p << 3;
13865 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13866 mask |= unordered_p << 1;
13867 mask |= stack_top_dies;
13869 gcc_assert (mask < 16);
13878 ix86_output_addr_vec_elt (FILE *file, int value)
13880 const char *directive = ASM_LONG;
13884 directive = ASM_QUAD;
13886 gcc_assert (!TARGET_64BIT);
13889 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13893 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13895 const char *directive = ASM_LONG;
13898 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13899 directive = ASM_QUAD;
13901 gcc_assert (!TARGET_64BIT);
13903 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13904 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13905 fprintf (file, "%s%s%d-%s%d\n",
13906 directive, LPREFIX, value, LPREFIX, rel);
13907 else if (HAVE_AS_GOTOFF_IN_DATA)
13908 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13910 else if (TARGET_MACHO)
13912 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13913 machopic_output_function_base_name (file);
13918 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13919 GOT_SYMBOL_NAME, LPREFIX, value);
13922 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13926 ix86_expand_clear (rtx dest)
13930 /* We play register width games, which are only valid after reload. */
13931 gcc_assert (reload_completed);
13933 /* Avoid HImode and its attendant prefix byte. */
13934 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13935 dest = gen_rtx_REG (SImode, REGNO (dest));
13936 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13938 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13939 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13941 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13942 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13948 /* X is an unchanging MEM. If it is a constant pool reference, return
13949 the constant pool rtx, else NULL. */
13952 maybe_get_pool_constant (rtx x)
13954 x = ix86_delegitimize_address (XEXP (x, 0));
13956 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13957 return get_pool_constant (x);
13963 ix86_expand_move (enum machine_mode mode, rtx operands[])
13966 enum tls_model model;
13971 if (GET_CODE (op1) == SYMBOL_REF)
13973 model = SYMBOL_REF_TLS_MODEL (op1);
13976 op1 = legitimize_tls_address (op1, model, true);
13977 op1 = force_operand (op1, op0);
13981 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13982 && SYMBOL_REF_DLLIMPORT_P (op1))
13983 op1 = legitimize_dllimport_symbol (op1, false);
13985 else if (GET_CODE (op1) == CONST
13986 && GET_CODE (XEXP (op1, 0)) == PLUS
13987 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13989 rtx addend = XEXP (XEXP (op1, 0), 1);
13990 rtx symbol = XEXP (XEXP (op1, 0), 0);
13993 model = SYMBOL_REF_TLS_MODEL (symbol);
13995 tmp = legitimize_tls_address (symbol, model, true);
13996 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13997 && SYMBOL_REF_DLLIMPORT_P (symbol))
13998 tmp = legitimize_dllimport_symbol (symbol, true);
14002 tmp = force_operand (tmp, NULL);
14003 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14004 op0, 1, OPTAB_DIRECT);
14010 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14012 if (TARGET_MACHO && !TARGET_64BIT)
14017 rtx temp = ((reload_in_progress
14018 || ((op0 && REG_P (op0))
14020 ? op0 : gen_reg_rtx (Pmode));
14021 op1 = machopic_indirect_data_reference (op1, temp);
14022 op1 = machopic_legitimize_pic_address (op1, mode,
14023 temp == op1 ? 0 : temp);
14025 else if (MACHOPIC_INDIRECT)
14026 op1 = machopic_indirect_data_reference (op1, 0);
14034 op1 = force_reg (Pmode, op1);
14035 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14037 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14038 op1 = legitimize_pic_address (op1, reg);
14047 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14048 || !push_operand (op0, mode))
14050 op1 = force_reg (mode, op1);
14052 if (push_operand (op0, mode)
14053 && ! general_no_elim_operand (op1, mode))
14054 op1 = copy_to_mode_reg (mode, op1);
14056 /* Force large constants in 64bit compilation into register
14057 to get them CSEed. */
14058 if (can_create_pseudo_p ()
14059 && (mode == DImode) && TARGET_64BIT
14060 && immediate_operand (op1, mode)
14061 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14062 && !register_operand (op0, mode)
14064 op1 = copy_to_mode_reg (mode, op1);
14066 if (can_create_pseudo_p ()
14067 && FLOAT_MODE_P (mode)
14068 && GET_CODE (op1) == CONST_DOUBLE)
14070 /* If we are loading a floating point constant to a register,
14071 force the value to memory now, since we'll get better code
14072 out the back end. */
14074 op1 = validize_mem (force_const_mem (mode, op1));
14075 if (!register_operand (op0, mode))
14077 rtx temp = gen_reg_rtx (mode);
14078 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14079 emit_move_insn (op0, temp);
14085 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14089 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14091 rtx op0 = operands[0], op1 = operands[1];
14092 unsigned int align = GET_MODE_ALIGNMENT (mode);
14094 /* Force constants other than zero into memory. We do not know how
14095 the instructions used to build constants modify the upper 64 bits
14096 of the register, once we have that information we may be able
14097 to handle some of them more efficiently. */
14098 if (can_create_pseudo_p ()
14099 && register_operand (op0, mode)
14100 && (CONSTANT_P (op1)
14101 || (GET_CODE (op1) == SUBREG
14102 && CONSTANT_P (SUBREG_REG (op1))))
14103 && !standard_sse_constant_p (op1))
14104 op1 = validize_mem (force_const_mem (mode, op1));
14106 /* We need to check memory alignment for SSE mode since attribute
14107 can make operands unaligned. */
14108 if (can_create_pseudo_p ()
14109 && SSE_REG_MODE_P (mode)
14110 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14111 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14115 /* ix86_expand_vector_move_misalign() does not like constants ... */
14116 if (CONSTANT_P (op1)
14117 || (GET_CODE (op1) == SUBREG
14118 && CONSTANT_P (SUBREG_REG (op1))))
14119 op1 = validize_mem (force_const_mem (mode, op1));
14121 /* ... nor both arguments in memory. */
14122 if (!register_operand (op0, mode)
14123 && !register_operand (op1, mode))
14124 op1 = force_reg (mode, op1);
14126 tmp[0] = op0; tmp[1] = op1;
14127 ix86_expand_vector_move_misalign (mode, tmp);
14131 /* Make operand1 a register if it isn't already. */
14132 if (can_create_pseudo_p ()
14133 && !register_operand (op0, mode)
14134 && !register_operand (op1, mode))
14136 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14140 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14143 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14144 straight to ix86_expand_vector_move. */
14145 /* Code generation for scalar reg-reg moves of single and double precision data:
14146 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14150 if (x86_sse_partial_reg_dependency == true)
14155 Code generation for scalar loads of double precision data:
14156 if (x86_sse_split_regs == true)
14157 movlpd mem, reg (gas syntax)
14161 Code generation for unaligned packed loads of single precision data
14162 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14163 if (x86_sse_unaligned_move_optimal)
14166 if (x86_sse_partial_reg_dependency == true)
14178 Code generation for unaligned packed loads of double precision data
14179 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14180 if (x86_sse_unaligned_move_optimal)
14183 if (x86_sse_split_regs == true)
14196 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14205 switch (GET_MODE_CLASS (mode))
14207 case MODE_VECTOR_INT:
14209 switch (GET_MODE_SIZE (mode))
14212 /* If we're optimizing for size, movups is the smallest. */
14213 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14215 op0 = gen_lowpart (V4SFmode, op0);
14216 op1 = gen_lowpart (V4SFmode, op1);
14217 emit_insn (gen_avx_movups (op0, op1));
14220 op0 = gen_lowpart (V16QImode, op0);
14221 op1 = gen_lowpart (V16QImode, op1);
14222 emit_insn (gen_avx_movdqu (op0, op1));
14225 op0 = gen_lowpart (V32QImode, op0);
14226 op1 = gen_lowpart (V32QImode, op1);
14227 emit_insn (gen_avx_movdqu256 (op0, op1));
14230 gcc_unreachable ();
14233 case MODE_VECTOR_FLOAT:
14234 op0 = gen_lowpart (mode, op0);
14235 op1 = gen_lowpart (mode, op1);
14240 emit_insn (gen_avx_movups (op0, op1));
14243 emit_insn (gen_avx_movups256 (op0, op1));
14246 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14248 op0 = gen_lowpart (V4SFmode, op0);
14249 op1 = gen_lowpart (V4SFmode, op1);
14250 emit_insn (gen_avx_movups (op0, op1));
14253 emit_insn (gen_avx_movupd (op0, op1));
14256 emit_insn (gen_avx_movupd256 (op0, op1));
14259 gcc_unreachable ();
14264 gcc_unreachable ();
14272 /* If we're optimizing for size, movups is the smallest. */
14273 if (optimize_insn_for_size_p ()
14274 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14276 op0 = gen_lowpart (V4SFmode, op0);
14277 op1 = gen_lowpart (V4SFmode, op1);
14278 emit_insn (gen_sse_movups (op0, op1));
14282 /* ??? If we have typed data, then it would appear that using
14283 movdqu is the only way to get unaligned data loaded with
14285 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14287 op0 = gen_lowpart (V16QImode, op0);
14288 op1 = gen_lowpart (V16QImode, op1);
14289 emit_insn (gen_sse2_movdqu (op0, op1));
14293 if (TARGET_SSE2 && mode == V2DFmode)
14297 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14299 op0 = gen_lowpart (V2DFmode, op0);
14300 op1 = gen_lowpart (V2DFmode, op1);
14301 emit_insn (gen_sse2_movupd (op0, op1));
14305 /* When SSE registers are split into halves, we can avoid
14306 writing to the top half twice. */
14307 if (TARGET_SSE_SPLIT_REGS)
14309 emit_clobber (op0);
14314 /* ??? Not sure about the best option for the Intel chips.
14315 The following would seem to satisfy; the register is
14316 entirely cleared, breaking the dependency chain. We
14317 then store to the upper half, with a dependency depth
14318 of one. A rumor has it that Intel recommends two movsd
14319 followed by an unpacklpd, but this is unconfirmed. And
14320 given that the dependency depth of the unpacklpd would
14321 still be one, I'm not sure why this would be better. */
14322 zero = CONST0_RTX (V2DFmode);
14325 m = adjust_address (op1, DFmode, 0);
14326 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14327 m = adjust_address (op1, DFmode, 8);
14328 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14332 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14334 op0 = gen_lowpart (V4SFmode, op0);
14335 op1 = gen_lowpart (V4SFmode, op1);
14336 emit_insn (gen_sse_movups (op0, op1));
14340 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14341 emit_move_insn (op0, CONST0_RTX (mode));
14343 emit_clobber (op0);
14345 if (mode != V4SFmode)
14346 op0 = gen_lowpart (V4SFmode, op0);
14347 m = adjust_address (op1, V2SFmode, 0);
14348 emit_insn (gen_sse_loadlps (op0, op0, m));
14349 m = adjust_address (op1, V2SFmode, 8);
14350 emit_insn (gen_sse_loadhps (op0, op0, m));
14353 else if (MEM_P (op0))
14355 /* If we're optimizing for size, movups is the smallest. */
14356 if (optimize_insn_for_size_p ()
14357 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14359 op0 = gen_lowpart (V4SFmode, op0);
14360 op1 = gen_lowpart (V4SFmode, op1);
14361 emit_insn (gen_sse_movups (op0, op1));
14365 /* ??? Similar to above, only less clear because of quote
14366 typeless stores unquote. */
14367 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14368 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14370 op0 = gen_lowpart (V16QImode, op0);
14371 op1 = gen_lowpart (V16QImode, op1);
14372 emit_insn (gen_sse2_movdqu (op0, op1));
14376 if (TARGET_SSE2 && mode == V2DFmode)
14378 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14380 op0 = gen_lowpart (V2DFmode, op0);
14381 op1 = gen_lowpart (V2DFmode, op1);
14382 emit_insn (gen_sse2_movupd (op0, op1));
14386 m = adjust_address (op0, DFmode, 0);
14387 emit_insn (gen_sse2_storelpd (m, op1));
14388 m = adjust_address (op0, DFmode, 8);
14389 emit_insn (gen_sse2_storehpd (m, op1));
14394 if (mode != V4SFmode)
14395 op1 = gen_lowpart (V4SFmode, op1);
14397 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14399 op0 = gen_lowpart (V4SFmode, op0);
14400 emit_insn (gen_sse_movups (op0, op1));
14404 m = adjust_address (op0, V2SFmode, 0);
14405 emit_insn (gen_sse_storelps (m, op1));
14406 m = adjust_address (op0, V2SFmode, 8);
14407 emit_insn (gen_sse_storehps (m, op1));
14412 gcc_unreachable ();
14415 /* Expand a push in MODE. This is some mode for which we do not support
14416 proper push instructions, at least from the registers that we expect
14417 the value to live in. */
14420 ix86_expand_push (enum machine_mode mode, rtx x)
14424 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14425 GEN_INT (-GET_MODE_SIZE (mode)),
14426 stack_pointer_rtx, 1, OPTAB_DIRECT);
14427 if (tmp != stack_pointer_rtx)
14428 emit_move_insn (stack_pointer_rtx, tmp);
14430 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14432 /* When we push an operand onto stack, it has to be aligned at least
14433 at the function argument boundary. However since we don't have
14434 the argument type, we can't determine the actual argument
14436 emit_move_insn (tmp, x);
14439 /* Helper function of ix86_fixup_binary_operands to canonicalize
14440 operand order. Returns true if the operands should be swapped. */
14443 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14446 rtx dst = operands[0];
14447 rtx src1 = operands[1];
14448 rtx src2 = operands[2];
14450 /* If the operation is not commutative, we can't do anything. */
14451 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14454 /* Highest priority is that src1 should match dst. */
14455 if (rtx_equal_p (dst, src1))
14457 if (rtx_equal_p (dst, src2))
14460 /* Next highest priority is that immediate constants come second. */
14461 if (immediate_operand (src2, mode))
14463 if (immediate_operand (src1, mode))
14466 /* Lowest priority is that memory references should come second. */
14476 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14477 destination to use for the operation. If different from the true
14478 destination in operands[0], a copy operation will be required. */
14481 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14484 rtx dst = operands[0];
14485 rtx src1 = operands[1];
14486 rtx src2 = operands[2];
14488 /* Canonicalize operand order. */
14489 if (ix86_swap_binary_operands_p (code, mode, operands))
14493 /* It is invalid to swap operands of different modes. */
14494 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14501 /* Both source operands cannot be in memory. */
14502 if (MEM_P (src1) && MEM_P (src2))
14504 /* Optimization: Only read from memory once. */
14505 if (rtx_equal_p (src1, src2))
14507 src2 = force_reg (mode, src2);
14511 src2 = force_reg (mode, src2);
14514 /* If the destination is memory, and we do not have matching source
14515 operands, do things in registers. */
14516 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14517 dst = gen_reg_rtx (mode);
14519 /* Source 1 cannot be a constant. */
14520 if (CONSTANT_P (src1))
14521 src1 = force_reg (mode, src1);
14523 /* Source 1 cannot be a non-matching memory. */
14524 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14525 src1 = force_reg (mode, src1);
14527 operands[1] = src1;
14528 operands[2] = src2;
14532 /* Similarly, but assume that the destination has already been
14533 set up properly. */
14536 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14537 enum machine_mode mode, rtx operands[])
14539 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14540 gcc_assert (dst == operands[0]);
14543 /* Attempt to expand a binary operator. Make the expansion closer to the
14544 actual machine, then just general_operand, which will allow 3 separate
14545 memory references (one output, two input) in a single insn. */
14548 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14551 rtx src1, src2, dst, op, clob;
14553 dst = ix86_fixup_binary_operands (code, mode, operands);
14554 src1 = operands[1];
14555 src2 = operands[2];
14557 /* Emit the instruction. */
14559 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14560 if (reload_in_progress)
14562 /* Reload doesn't know about the flags register, and doesn't know that
14563 it doesn't want to clobber it. We can only do this with PLUS. */
14564 gcc_assert (code == PLUS);
14569 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14570 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14573 /* Fix up the destination if needed. */
14574 if (dst != operands[0])
14575 emit_move_insn (operands[0], dst);
14578 /* Return TRUE or FALSE depending on whether the binary operator meets the
14579 appropriate constraints. */
14582 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14585 rtx dst = operands[0];
14586 rtx src1 = operands[1];
14587 rtx src2 = operands[2];
14589 /* Both source operands cannot be in memory. */
14590 if (MEM_P (src1) && MEM_P (src2))
14593 /* Canonicalize operand order for commutative operators. */
14594 if (ix86_swap_binary_operands_p (code, mode, operands))
14601 /* If the destination is memory, we must have a matching source operand. */
14602 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14605 /* Source 1 cannot be a constant. */
14606 if (CONSTANT_P (src1))
14609 /* Source 1 cannot be a non-matching memory. */
14610 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14616 /* Attempt to expand a unary operator. Make the expansion closer to the
14617 actual machine, then just general_operand, which will allow 2 separate
14618 memory references (one output, one input) in a single insn. */
14621 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14624 int matching_memory;
14625 rtx src, dst, op, clob;
14630 /* If the destination is memory, and we do not have matching source
14631 operands, do things in registers. */
14632 matching_memory = 0;
14635 if (rtx_equal_p (dst, src))
14636 matching_memory = 1;
14638 dst = gen_reg_rtx (mode);
14641 /* When source operand is memory, destination must match. */
14642 if (MEM_P (src) && !matching_memory)
14643 src = force_reg (mode, src);
14645 /* Emit the instruction. */
14647 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14648 if (reload_in_progress || code == NOT)
14650 /* Reload doesn't know about the flags register, and doesn't know that
14651 it doesn't want to clobber it. */
14652 gcc_assert (code == NOT);
14657 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14658 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14661 /* Fix up the destination if needed. */
14662 if (dst != operands[0])
14663 emit_move_insn (operands[0], dst);
14666 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
14667 divisor are within the the range [0-255]. */
14670 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
14673 rtx end_label, qimode_label;
14674 rtx insn, div, mod;
14675 rtx scratch, tmp0, tmp1, tmp2;
14676 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
14677 rtx (*gen_zero_extend) (rtx, rtx);
14678 rtx (*gen_test_ccno_1) (rtx, rtx);
14683 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
14684 gen_test_ccno_1 = gen_testsi_ccno_1;
14685 gen_zero_extend = gen_zero_extendqisi2;
14688 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
14689 gen_test_ccno_1 = gen_testdi_ccno_1;
14690 gen_zero_extend = gen_zero_extendqidi2;
14693 gcc_unreachable ();
14696 end_label = gen_label_rtx ();
14697 qimode_label = gen_label_rtx ();
14699 scratch = gen_reg_rtx (mode);
14701 /* Use 8bit unsigned divimod if dividend and divisor are within the
14702 the range [0-255]. */
14703 emit_move_insn (scratch, operands[2]);
14704 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
14705 scratch, 1, OPTAB_DIRECT);
14706 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
14707 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
14708 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
14709 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
14710 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
14712 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
14713 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14714 JUMP_LABEL (insn) = qimode_label;
14716 /* Generate original signed/unsigned divimod. */
14717 div = gen_divmod4_1 (operands[0], operands[1],
14718 operands[2], operands[3]);
14721 /* Branch to the end. */
14722 emit_jump_insn (gen_jump (end_label));
14725 /* Generate 8bit unsigned divide. */
14726 emit_label (qimode_label);
14727 /* Don't use operands[0] for result of 8bit divide since not all
14728 registers support QImode ZERO_EXTRACT. */
14729 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
14730 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
14731 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
14732 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
14736 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
14737 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
14741 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
14742 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
14745 /* Extract remainder from AH. */
14746 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
14747 if (REG_P (operands[1]))
14748 insn = emit_move_insn (operands[1], tmp1);
14751 /* Need a new scratch register since the old one has result
14753 scratch = gen_reg_rtx (mode);
14754 emit_move_insn (scratch, tmp1);
14755 insn = emit_move_insn (operands[1], scratch);
14757 set_unique_reg_note (insn, REG_EQUAL, mod);
14759 /* Zero extend quotient from AL. */
14760 tmp1 = gen_lowpart (QImode, tmp0);
14761 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
14762 set_unique_reg_note (insn, REG_EQUAL, div);
14764 emit_label (end_label);
14767 #define LEA_SEARCH_THRESHOLD 12
14769 /* Search backward for non-agu definition of register number REGNO1
14770 or register number REGNO2 in INSN's basic block until
14771 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14772 2. Reach BB boundary, or
14773 3. Reach agu definition.
14774 Returns the distance between the non-agu definition point and INSN.
14775 If no definition point, returns -1. */
14778 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14781 basic_block bb = BLOCK_FOR_INSN (insn);
14784 enum attr_type insn_type;
14786 if (insn != BB_HEAD (bb))
14788 rtx prev = PREV_INSN (insn);
14789 while (prev && distance < LEA_SEARCH_THRESHOLD)
14791 if (NONDEBUG_INSN_P (prev))
14794 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14795 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14796 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14797 && (regno1 == DF_REF_REGNO (*def_rec)
14798 || regno2 == DF_REF_REGNO (*def_rec)))
14800 insn_type = get_attr_type (prev);
14801 if (insn_type != TYPE_LEA)
14805 if (prev == BB_HEAD (bb))
14807 prev = PREV_INSN (prev);
14811 if (distance < LEA_SEARCH_THRESHOLD)
14815 bool simple_loop = false;
14817 FOR_EACH_EDGE (e, ei, bb->preds)
14820 simple_loop = true;
14826 rtx prev = BB_END (bb);
14829 && distance < LEA_SEARCH_THRESHOLD)
14831 if (NONDEBUG_INSN_P (prev))
14834 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14835 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14836 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14837 && (regno1 == DF_REF_REGNO (*def_rec)
14838 || regno2 == DF_REF_REGNO (*def_rec)))
14840 insn_type = get_attr_type (prev);
14841 if (insn_type != TYPE_LEA)
14845 prev = PREV_INSN (prev);
14853 /* get_attr_type may modify recog data. We want to make sure
14854 that recog data is valid for instruction INSN, on which
14855 distance_non_agu_define is called. INSN is unchanged here. */
14856 extract_insn_cached (insn);
14860 /* Return the distance between INSN and the next insn that uses
14861 register number REGNO0 in memory address. Return -1 if no such
14862 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14865 distance_agu_use (unsigned int regno0, rtx insn)
14867 basic_block bb = BLOCK_FOR_INSN (insn);
14872 if (insn != BB_END (bb))
14874 rtx next = NEXT_INSN (insn);
14875 while (next && distance < LEA_SEARCH_THRESHOLD)
14877 if (NONDEBUG_INSN_P (next))
14881 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14882 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14883 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14884 && regno0 == DF_REF_REGNO (*use_rec))
14886 /* Return DISTANCE if OP0 is used in memory
14887 address in NEXT. */
14891 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14892 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14893 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14894 && regno0 == DF_REF_REGNO (*def_rec))
14896 /* Return -1 if OP0 is set in NEXT. */
14900 if (next == BB_END (bb))
14902 next = NEXT_INSN (next);
14906 if (distance < LEA_SEARCH_THRESHOLD)
14910 bool simple_loop = false;
14912 FOR_EACH_EDGE (e, ei, bb->succs)
14915 simple_loop = true;
14921 rtx next = BB_HEAD (bb);
14924 && distance < LEA_SEARCH_THRESHOLD)
14926 if (NONDEBUG_INSN_P (next))
14930 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14931 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14932 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14933 && regno0 == DF_REF_REGNO (*use_rec))
14935 /* Return DISTANCE if OP0 is used in memory
14936 address in NEXT. */
14940 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14941 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14942 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14943 && regno0 == DF_REF_REGNO (*def_rec))
14945 /* Return -1 if OP0 is set in NEXT. */
14950 next = NEXT_INSN (next);
14958 /* Define this macro to tune LEA priority vs ADD, it take effect when
14959 there is a dilemma of choicing LEA or ADD
14960 Negative value: ADD is more preferred than LEA
14962 Positive value: LEA is more preferred than ADD*/
14963 #define IX86_LEA_PRIORITY 2
14965 /* Return true if it is ok to optimize an ADD operation to LEA
14966 operation to avoid flag register consumation. For most processors,
14967 ADD is faster than LEA. For the processors like ATOM, if the
14968 destination register of LEA holds an actual address which will be
14969 used soon, LEA is better and otherwise ADD is better. */
14972 ix86_lea_for_add_ok (rtx insn, rtx operands[])
14974 unsigned int regno0 = true_regnum (operands[0]);
14975 unsigned int regno1 = true_regnum (operands[1]);
14976 unsigned int regno2 = true_regnum (operands[2]);
14978 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14979 if (regno0 != regno1 && regno0 != regno2)
14982 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14986 int dist_define, dist_use;
14988 /* Return false if REGNO0 isn't used in memory address. */
14989 dist_use = distance_agu_use (regno0, insn);
14993 dist_define = distance_non_agu_define (regno1, regno2, insn);
14994 if (dist_define <= 0)
14997 /* If this insn has both backward non-agu dependence and forward
14998 agu dependence, the one with short distance take effect. */
14999 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15006 /* Return true if destination reg of SET_BODY is shift count of
15010 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15016 /* Retrieve destination of SET_BODY. */
15017 switch (GET_CODE (set_body))
15020 set_dest = SET_DEST (set_body);
15021 if (!set_dest || !REG_P (set_dest))
15025 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15026 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15034 /* Retrieve shift count of USE_BODY. */
15035 switch (GET_CODE (use_body))
15038 shift_rtx = XEXP (use_body, 1);
15041 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15042 if (ix86_dep_by_shift_count_body (set_body,
15043 XVECEXP (use_body, 0, i)))
15051 && (GET_CODE (shift_rtx) == ASHIFT
15052 || GET_CODE (shift_rtx) == LSHIFTRT
15053 || GET_CODE (shift_rtx) == ASHIFTRT
15054 || GET_CODE (shift_rtx) == ROTATE
15055 || GET_CODE (shift_rtx) == ROTATERT))
15057 rtx shift_count = XEXP (shift_rtx, 1);
15059 /* Return true if shift count is dest of SET_BODY. */
15060 if (REG_P (shift_count)
15061 && true_regnum (set_dest) == true_regnum (shift_count))
15068 /* Return true if destination reg of SET_INSN is shift count of
15072 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15074 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15075 PATTERN (use_insn));
15078 /* Return TRUE or FALSE depending on whether the unary operator meets the
15079 appropriate constraints. */
15082 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15083 enum machine_mode mode ATTRIBUTE_UNUSED,
15084 rtx operands[2] ATTRIBUTE_UNUSED)
15086 /* If one of operands is memory, source and destination must match. */
15087 if ((MEM_P (operands[0])
15088 || MEM_P (operands[1]))
15089 && ! rtx_equal_p (operands[0], operands[1]))
15094 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15095 are ok, keeping in mind the possible movddup alternative. */
15098 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15100 if (MEM_P (operands[0]))
15101 return rtx_equal_p (operands[0], operands[1 + high]);
15102 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15103 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15107 /* Post-reload splitter for converting an SF or DFmode value in an
15108 SSE register into an unsigned SImode. */
15111 ix86_split_convert_uns_si_sse (rtx operands[])
15113 enum machine_mode vecmode;
15114 rtx value, large, zero_or_two31, input, two31, x;
15116 large = operands[1];
15117 zero_or_two31 = operands[2];
15118 input = operands[3];
15119 two31 = operands[4];
15120 vecmode = GET_MODE (large);
15121 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15123 /* Load up the value into the low element. We must ensure that the other
15124 elements are valid floats -- zero is the easiest such value. */
15127 if (vecmode == V4SFmode)
15128 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15130 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15134 input = gen_rtx_REG (vecmode, REGNO (input));
15135 emit_move_insn (value, CONST0_RTX (vecmode));
15136 if (vecmode == V4SFmode)
15137 emit_insn (gen_sse_movss (value, value, input));
15139 emit_insn (gen_sse2_movsd (value, value, input));
15142 emit_move_insn (large, two31);
15143 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15145 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15146 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15148 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15149 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15151 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15152 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15154 large = gen_rtx_REG (V4SImode, REGNO (large));
15155 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15157 x = gen_rtx_REG (V4SImode, REGNO (value));
15158 if (vecmode == V4SFmode)
15159 emit_insn (gen_sse2_cvttps2dq (x, value));
15161 emit_insn (gen_sse2_cvttpd2dq (x, value));
15164 emit_insn (gen_xorv4si3 (value, value, large));
15167 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15168 Expects the 64-bit DImode to be supplied in a pair of integral
15169 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15170 -mfpmath=sse, !optimize_size only. */
15173 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15175 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15176 rtx int_xmm, fp_xmm;
15177 rtx biases, exponents;
15180 int_xmm = gen_reg_rtx (V4SImode);
15181 if (TARGET_INTER_UNIT_MOVES)
15182 emit_insn (gen_movdi_to_sse (int_xmm, input));
15183 else if (TARGET_SSE_SPLIT_REGS)
15185 emit_clobber (int_xmm);
15186 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15190 x = gen_reg_rtx (V2DImode);
15191 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15192 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15195 x = gen_rtx_CONST_VECTOR (V4SImode,
15196 gen_rtvec (4, GEN_INT (0x43300000UL),
15197 GEN_INT (0x45300000UL),
15198 const0_rtx, const0_rtx));
15199 exponents = validize_mem (force_const_mem (V4SImode, x));
15201 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15202 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15204 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15205 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15206 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15207 (0x1.0p84 + double(fp_value_hi_xmm)).
15208 Note these exponents differ by 32. */
15210 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15212 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15213 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15214 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15215 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15216 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15217 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15218 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15219 biases = validize_mem (force_const_mem (V2DFmode, biases));
15220 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15222 /* Add the upper and lower DFmode values together. */
15224 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15227 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15228 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15229 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15232 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15235 /* Not used, but eases macroization of patterns. */
15237 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15238 rtx input ATTRIBUTE_UNUSED)
15240 gcc_unreachable ();
15243 /* Convert an unsigned SImode value into a DFmode. Only currently used
15244 for SSE, but applicable anywhere. */
15247 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15249 REAL_VALUE_TYPE TWO31r;
15252 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15253 NULL, 1, OPTAB_DIRECT);
15255 fp = gen_reg_rtx (DFmode);
15256 emit_insn (gen_floatsidf2 (fp, x));
15258 real_ldexp (&TWO31r, &dconst1, 31);
15259 x = const_double_from_real_value (TWO31r, DFmode);
15261 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15263 emit_move_insn (target, x);
15266 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15267 32-bit mode; otherwise we have a direct convert instruction. */
15270 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15272 REAL_VALUE_TYPE TWO32r;
15273 rtx fp_lo, fp_hi, x;
15275 fp_lo = gen_reg_rtx (DFmode);
15276 fp_hi = gen_reg_rtx (DFmode);
15278 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15280 real_ldexp (&TWO32r, &dconst1, 32);
15281 x = const_double_from_real_value (TWO32r, DFmode);
15282 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15284 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15286 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15289 emit_move_insn (target, x);
15292 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15293 For x86_32, -mfpmath=sse, !optimize_size only. */
15295 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15297 REAL_VALUE_TYPE ONE16r;
15298 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15300 real_ldexp (&ONE16r, &dconst1, 16);
15301 x = const_double_from_real_value (ONE16r, SFmode);
15302 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15303 NULL, 0, OPTAB_DIRECT);
15304 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15305 NULL, 0, OPTAB_DIRECT);
15306 fp_hi = gen_reg_rtx (SFmode);
15307 fp_lo = gen_reg_rtx (SFmode);
15308 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15309 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15310 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15312 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15314 if (!rtx_equal_p (target, fp_hi))
15315 emit_move_insn (target, fp_hi);
15318 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15319 then replicate the value for all elements of the vector
15323 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15330 v = gen_rtvec (4, value, value, value, value);
15331 return gen_rtx_CONST_VECTOR (V4SImode, v);
15335 v = gen_rtvec (2, value, value);
15336 return gen_rtx_CONST_VECTOR (V2DImode, v);
15340 v = gen_rtvec (4, value, value, value, value);
15342 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15343 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15344 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15348 v = gen_rtvec (2, value, value);
15350 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15351 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15354 gcc_unreachable ();
15358 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15359 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15360 for an SSE register. If VECT is true, then replicate the mask for
15361 all elements of the vector register. If INVERT is true, then create
15362 a mask excluding the sign bit. */
15365 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15367 enum machine_mode vec_mode, imode;
15368 HOST_WIDE_INT hi, lo;
15373 /* Find the sign bit, sign extended to 2*HWI. */
15379 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15380 lo = 0x80000000, hi = lo < 0;
15386 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15387 if (HOST_BITS_PER_WIDE_INT >= 64)
15388 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15390 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15395 vec_mode = VOIDmode;
15396 if (HOST_BITS_PER_WIDE_INT >= 64)
15399 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15406 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15410 lo = ~lo, hi = ~hi;
15416 mask = immed_double_const (lo, hi, imode);
15418 vec = gen_rtvec (2, v, mask);
15419 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15420 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15427 gcc_unreachable ();
15431 lo = ~lo, hi = ~hi;
15433 /* Force this value into the low part of a fp vector constant. */
15434 mask = immed_double_const (lo, hi, imode);
15435 mask = gen_lowpart (mode, mask);
15437 if (vec_mode == VOIDmode)
15438 return force_reg (mode, mask);
15440 v = ix86_build_const_vector (mode, vect, mask);
15441 return force_reg (vec_mode, v);
15444 /* Generate code for floating point ABS or NEG. */
15447 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15450 rtx mask, set, use, clob, dst, src;
15451 bool use_sse = false;
15452 bool vector_mode = VECTOR_MODE_P (mode);
15453 enum machine_mode elt_mode = mode;
15457 elt_mode = GET_MODE_INNER (mode);
15460 else if (mode == TFmode)
15462 else if (TARGET_SSE_MATH)
15463 use_sse = SSE_FLOAT_MODE_P (mode);
15465 /* NEG and ABS performed with SSE use bitwise mask operations.
15466 Create the appropriate mask now. */
15468 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15477 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15478 set = gen_rtx_SET (VOIDmode, dst, set);
15483 set = gen_rtx_fmt_e (code, mode, src);
15484 set = gen_rtx_SET (VOIDmode, dst, set);
15487 use = gen_rtx_USE (VOIDmode, mask);
15488 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15489 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15490 gen_rtvec (3, set, use, clob)));
15497 /* Expand a copysign operation. Special case operand 0 being a constant. */
15500 ix86_expand_copysign (rtx operands[])
15502 enum machine_mode mode;
15503 rtx dest, op0, op1, mask, nmask;
15505 dest = operands[0];
15509 mode = GET_MODE (dest);
15511 if (GET_CODE (op0) == CONST_DOUBLE)
15513 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15515 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15516 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15518 if (mode == SFmode || mode == DFmode)
15520 enum machine_mode vmode;
15522 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15524 if (op0 == CONST0_RTX (mode))
15525 op0 = CONST0_RTX (vmode);
15528 rtx v = ix86_build_const_vector (mode, false, op0);
15530 op0 = force_reg (vmode, v);
15533 else if (op0 != CONST0_RTX (mode))
15534 op0 = force_reg (mode, op0);
15536 mask = ix86_build_signbit_mask (mode, 0, 0);
15538 if (mode == SFmode)
15539 copysign_insn = gen_copysignsf3_const;
15540 else if (mode == DFmode)
15541 copysign_insn = gen_copysigndf3_const;
15543 copysign_insn = gen_copysigntf3_const;
15545 emit_insn (copysign_insn (dest, op0, op1, mask));
15549 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15551 nmask = ix86_build_signbit_mask (mode, 0, 1);
15552 mask = ix86_build_signbit_mask (mode, 0, 0);
15554 if (mode == SFmode)
15555 copysign_insn = gen_copysignsf3_var;
15556 else if (mode == DFmode)
15557 copysign_insn = gen_copysigndf3_var;
15559 copysign_insn = gen_copysigntf3_var;
15561 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15565 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15566 be a constant, and so has already been expanded into a vector constant. */
15569 ix86_split_copysign_const (rtx operands[])
15571 enum machine_mode mode, vmode;
15572 rtx dest, op0, mask, x;
15574 dest = operands[0];
15576 mask = operands[3];
15578 mode = GET_MODE (dest);
15579 vmode = GET_MODE (mask);
15581 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15582 x = gen_rtx_AND (vmode, dest, mask);
15583 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15585 if (op0 != CONST0_RTX (vmode))
15587 x = gen_rtx_IOR (vmode, dest, op0);
15588 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15592 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15593 so we have to do two masks. */
15596 ix86_split_copysign_var (rtx operands[])
15598 enum machine_mode mode, vmode;
15599 rtx dest, scratch, op0, op1, mask, nmask, x;
15601 dest = operands[0];
15602 scratch = operands[1];
15605 nmask = operands[4];
15606 mask = operands[5];
15608 mode = GET_MODE (dest);
15609 vmode = GET_MODE (mask);
15611 if (rtx_equal_p (op0, op1))
15613 /* Shouldn't happen often (it's useless, obviously), but when it does
15614 we'd generate incorrect code if we continue below. */
15615 emit_move_insn (dest, op0);
15619 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15621 gcc_assert (REGNO (op1) == REGNO (scratch));
15623 x = gen_rtx_AND (vmode, scratch, mask);
15624 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15627 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15628 x = gen_rtx_NOT (vmode, dest);
15629 x = gen_rtx_AND (vmode, x, op0);
15630 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15634 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15636 x = gen_rtx_AND (vmode, scratch, mask);
15638 else /* alternative 2,4 */
15640 gcc_assert (REGNO (mask) == REGNO (scratch));
15641 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15642 x = gen_rtx_AND (vmode, scratch, op1);
15644 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15646 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15648 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15649 x = gen_rtx_AND (vmode, dest, nmask);
15651 else /* alternative 3,4 */
15653 gcc_assert (REGNO (nmask) == REGNO (dest));
15655 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15656 x = gen_rtx_AND (vmode, dest, op0);
15658 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15661 x = gen_rtx_IOR (vmode, dest, scratch);
15662 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15665 /* Return TRUE or FALSE depending on whether the first SET in INSN
15666 has source and destination with matching CC modes, and that the
15667 CC mode is at least as constrained as REQ_MODE. */
15670 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15673 enum machine_mode set_mode;
15675 set = PATTERN (insn);
15676 if (GET_CODE (set) == PARALLEL)
15677 set = XVECEXP (set, 0, 0);
15678 gcc_assert (GET_CODE (set) == SET);
15679 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15681 set_mode = GET_MODE (SET_DEST (set));
15685 if (req_mode != CCNOmode
15686 && (req_mode != CCmode
15687 || XEXP (SET_SRC (set), 1) != const0_rtx))
15691 if (req_mode == CCGCmode)
15695 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15699 if (req_mode == CCZmode)
15710 gcc_unreachable ();
15713 return GET_MODE (SET_SRC (set)) == set_mode;
15716 /* Generate insn patterns to do an integer compare of OPERANDS. */
15719 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15721 enum machine_mode cmpmode;
15724 cmpmode = SELECT_CC_MODE (code, op0, op1);
15725 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15727 /* This is very simple, but making the interface the same as in the
15728 FP case makes the rest of the code easier. */
15729 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15730 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15732 /* Return the test that should be put into the flags user, i.e.
15733 the bcc, scc, or cmov instruction. */
15734 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15737 /* Figure out whether to use ordered or unordered fp comparisons.
15738 Return the appropriate mode to use. */
15741 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15743 /* ??? In order to make all comparisons reversible, we do all comparisons
15744 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15745 all forms trapping and nontrapping comparisons, we can make inequality
15746 comparisons trapping again, since it results in better code when using
15747 FCOM based compares. */
15748 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15752 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15754 enum machine_mode mode = GET_MODE (op0);
15756 if (SCALAR_FLOAT_MODE_P (mode))
15758 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15759 return ix86_fp_compare_mode (code);
15764 /* Only zero flag is needed. */
15765 case EQ: /* ZF=0 */
15766 case NE: /* ZF!=0 */
15768 /* Codes needing carry flag. */
15769 case GEU: /* CF=0 */
15770 case LTU: /* CF=1 */
15771 /* Detect overflow checks. They need just the carry flag. */
15772 if (GET_CODE (op0) == PLUS
15773 && rtx_equal_p (op1, XEXP (op0, 0)))
15777 case GTU: /* CF=0 & ZF=0 */
15778 case LEU: /* CF=1 | ZF=1 */
15779 /* Detect overflow checks. They need just the carry flag. */
15780 if (GET_CODE (op0) == MINUS
15781 && rtx_equal_p (op1, XEXP (op0, 0)))
15785 /* Codes possibly doable only with sign flag when
15786 comparing against zero. */
15787 case GE: /* SF=OF or SF=0 */
15788 case LT: /* SF<>OF or SF=1 */
15789 if (op1 == const0_rtx)
15792 /* For other cases Carry flag is not required. */
15794 /* Codes doable only with sign flag when comparing
15795 against zero, but we miss jump instruction for it
15796 so we need to use relational tests against overflow
15797 that thus needs to be zero. */
15798 case GT: /* ZF=0 & SF=OF */
15799 case LE: /* ZF=1 | SF<>OF */
15800 if (op1 == const0_rtx)
15804 /* strcmp pattern do (use flags) and combine may ask us for proper
15809 gcc_unreachable ();
15813 /* Return the fixed registers used for condition codes. */
15816 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15823 /* If two condition code modes are compatible, return a condition code
15824 mode which is compatible with both. Otherwise, return
15827 static enum machine_mode
15828 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15833 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15836 if ((m1 == CCGCmode && m2 == CCGOCmode)
15837 || (m1 == CCGOCmode && m2 == CCGCmode))
15843 gcc_unreachable ();
15873 /* These are only compatible with themselves, which we already
15880 /* Return a comparison we can do and that it is equivalent to
15881 swap_condition (code) apart possibly from orderedness.
15882 But, never change orderedness if TARGET_IEEE_FP, returning
15883 UNKNOWN in that case if necessary. */
15885 static enum rtx_code
15886 ix86_fp_swap_condition (enum rtx_code code)
15890 case GT: /* GTU - CF=0 & ZF=0 */
15891 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15892 case GE: /* GEU - CF=0 */
15893 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15894 case UNLT: /* LTU - CF=1 */
15895 return TARGET_IEEE_FP ? UNKNOWN : GT;
15896 case UNLE: /* LEU - CF=1 | ZF=1 */
15897 return TARGET_IEEE_FP ? UNKNOWN : GE;
15899 return swap_condition (code);
15903 /* Return cost of comparison CODE using the best strategy for performance.
15904 All following functions do use number of instructions as a cost metrics.
15905 In future this should be tweaked to compute bytes for optimize_size and
15906 take into account performance of various instructions on various CPUs. */
15909 ix86_fp_comparison_cost (enum rtx_code code)
15913 /* The cost of code using bit-twiddling on %ah. */
15930 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15934 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15937 gcc_unreachable ();
15940 switch (ix86_fp_comparison_strategy (code))
15942 case IX86_FPCMP_COMI:
15943 return arith_cost > 4 ? 3 : 2;
15944 case IX86_FPCMP_SAHF:
15945 return arith_cost > 4 ? 4 : 3;
15951 /* Return strategy to use for floating-point. We assume that fcomi is always
15952 preferrable where available, since that is also true when looking at size
15953 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15955 enum ix86_fpcmp_strategy
15956 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15958 /* Do fcomi/sahf based test when profitable. */
15961 return IX86_FPCMP_COMI;
15963 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15964 return IX86_FPCMP_SAHF;
15966 return IX86_FPCMP_ARITH;
15969 /* Swap, force into registers, or otherwise massage the two operands
15970 to a fp comparison. The operands are updated in place; the new
15971 comparison code is returned. */
15973 static enum rtx_code
15974 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15976 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15977 rtx op0 = *pop0, op1 = *pop1;
15978 enum machine_mode op_mode = GET_MODE (op0);
15979 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15981 /* All of the unordered compare instructions only work on registers.
15982 The same is true of the fcomi compare instructions. The XFmode
15983 compare instructions require registers except when comparing
15984 against zero or when converting operand 1 from fixed point to
15988 && (fpcmp_mode == CCFPUmode
15989 || (op_mode == XFmode
15990 && ! (standard_80387_constant_p (op0) == 1
15991 || standard_80387_constant_p (op1) == 1)
15992 && GET_CODE (op1) != FLOAT)
15993 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15995 op0 = force_reg (op_mode, op0);
15996 op1 = force_reg (op_mode, op1);
16000 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16001 things around if they appear profitable, otherwise force op0
16002 into a register. */
16004 if (standard_80387_constant_p (op0) == 0
16006 && ! (standard_80387_constant_p (op1) == 0
16009 enum rtx_code new_code = ix86_fp_swap_condition (code);
16010 if (new_code != UNKNOWN)
16013 tmp = op0, op0 = op1, op1 = tmp;
16019 op0 = force_reg (op_mode, op0);
16021 if (CONSTANT_P (op1))
16023 int tmp = standard_80387_constant_p (op1);
16025 op1 = validize_mem (force_const_mem (op_mode, op1));
16029 op1 = force_reg (op_mode, op1);
16032 op1 = force_reg (op_mode, op1);
16036 /* Try to rearrange the comparison to make it cheaper. */
16037 if (ix86_fp_comparison_cost (code)
16038 > ix86_fp_comparison_cost (swap_condition (code))
16039 && (REG_P (op1) || can_create_pseudo_p ()))
16042 tmp = op0, op0 = op1, op1 = tmp;
16043 code = swap_condition (code);
16045 op0 = force_reg (op_mode, op0);
16053 /* Convert comparison codes we use to represent FP comparison to integer
16054 code that will result in proper branch. Return UNKNOWN if no such code
16058 ix86_fp_compare_code_to_integer (enum rtx_code code)
16087 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16090 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16092 enum machine_mode fpcmp_mode, intcmp_mode;
16095 fpcmp_mode = ix86_fp_compare_mode (code);
16096 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16098 /* Do fcomi/sahf based test when profitable. */
16099 switch (ix86_fp_comparison_strategy (code))
16101 case IX86_FPCMP_COMI:
16102 intcmp_mode = fpcmp_mode;
16103 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16104 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16109 case IX86_FPCMP_SAHF:
16110 intcmp_mode = fpcmp_mode;
16111 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16112 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16116 scratch = gen_reg_rtx (HImode);
16117 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16118 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16121 case IX86_FPCMP_ARITH:
16122 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16123 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16124 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16126 scratch = gen_reg_rtx (HImode);
16127 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16129 /* In the unordered case, we have to check C2 for NaN's, which
16130 doesn't happen to work out to anything nice combination-wise.
16131 So do some bit twiddling on the value we've got in AH to come
16132 up with an appropriate set of condition codes. */
16134 intcmp_mode = CCNOmode;
16139 if (code == GT || !TARGET_IEEE_FP)
16141 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16146 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16147 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16148 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16149 intcmp_mode = CCmode;
16155 if (code == LT && TARGET_IEEE_FP)
16157 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16158 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16159 intcmp_mode = CCmode;
16164 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16170 if (code == GE || !TARGET_IEEE_FP)
16172 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16177 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16178 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16184 if (code == LE && TARGET_IEEE_FP)
16186 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16187 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16188 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16189 intcmp_mode = CCmode;
16194 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16200 if (code == EQ && TARGET_IEEE_FP)
16202 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16203 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16204 intcmp_mode = CCmode;
16209 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16215 if (code == NE && TARGET_IEEE_FP)
16217 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16218 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16224 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16230 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16239 gcc_unreachable ();
16247 /* Return the test that should be put into the flags user, i.e.
16248 the bcc, scc, or cmov instruction. */
16249 return gen_rtx_fmt_ee (code, VOIDmode,
16250 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16255 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16259 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16260 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16262 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16264 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16265 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16268 ret = ix86_expand_int_compare (code, op0, op1);
16274 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16278 switch (GET_MODE (op0))
16287 tmp = ix86_expand_compare (code, op0, op1);
16288 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16289 gen_rtx_LABEL_REF (VOIDmode, label),
16291 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16298 /* Expand DImode branch into multiple compare+branch. */
16300 rtx lo[2], hi[2], label2;
16301 enum rtx_code code1, code2, code3;
16302 enum machine_mode submode;
16304 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16306 tmp = op0, op0 = op1, op1 = tmp;
16307 code = swap_condition (code);
16309 if (GET_MODE (op0) == DImode)
16311 split_di (&op0, 1, lo+0, hi+0);
16312 split_di (&op1, 1, lo+1, hi+1);
16317 split_ti (&op0, 1, lo+0, hi+0);
16318 split_ti (&op1, 1, lo+1, hi+1);
16322 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16323 avoid two branches. This costs one extra insn, so disable when
16324 optimizing for size. */
16326 if ((code == EQ || code == NE)
16327 && (!optimize_insn_for_size_p ()
16328 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16333 if (hi[1] != const0_rtx)
16334 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16335 NULL_RTX, 0, OPTAB_WIDEN);
16338 if (lo[1] != const0_rtx)
16339 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16340 NULL_RTX, 0, OPTAB_WIDEN);
16342 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16343 NULL_RTX, 0, OPTAB_WIDEN);
16345 ix86_expand_branch (code, tmp, const0_rtx, label);
16349 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16350 op1 is a constant and the low word is zero, then we can just
16351 examine the high word. Similarly for low word -1 and
16352 less-or-equal-than or greater-than. */
16354 if (CONST_INT_P (hi[1]))
16357 case LT: case LTU: case GE: case GEU:
16358 if (lo[1] == const0_rtx)
16360 ix86_expand_branch (code, hi[0], hi[1], label);
16364 case LE: case LEU: case GT: case GTU:
16365 if (lo[1] == constm1_rtx)
16367 ix86_expand_branch (code, hi[0], hi[1], label);
16375 /* Otherwise, we need two or three jumps. */
16377 label2 = gen_label_rtx ();
16380 code2 = swap_condition (code);
16381 code3 = unsigned_condition (code);
16385 case LT: case GT: case LTU: case GTU:
16388 case LE: code1 = LT; code2 = GT; break;
16389 case GE: code1 = GT; code2 = LT; break;
16390 case LEU: code1 = LTU; code2 = GTU; break;
16391 case GEU: code1 = GTU; code2 = LTU; break;
16393 case EQ: code1 = UNKNOWN; code2 = NE; break;
16394 case NE: code2 = UNKNOWN; break;
16397 gcc_unreachable ();
16402 * if (hi(a) < hi(b)) goto true;
16403 * if (hi(a) > hi(b)) goto false;
16404 * if (lo(a) < lo(b)) goto true;
16408 if (code1 != UNKNOWN)
16409 ix86_expand_branch (code1, hi[0], hi[1], label);
16410 if (code2 != UNKNOWN)
16411 ix86_expand_branch (code2, hi[0], hi[1], label2);
16413 ix86_expand_branch (code3, lo[0], lo[1], label);
16415 if (code2 != UNKNOWN)
16416 emit_label (label2);
16421 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16426 /* Split branch based on floating point condition. */
16428 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16429 rtx target1, rtx target2, rtx tmp, rtx pushed)
16434 if (target2 != pc_rtx)
16437 code = reverse_condition_maybe_unordered (code);
16442 condition = ix86_expand_fp_compare (code, op1, op2,
16445 /* Remove pushed operand from stack. */
16447 ix86_free_from_memory (GET_MODE (pushed));
16449 i = emit_jump_insn (gen_rtx_SET
16451 gen_rtx_IF_THEN_ELSE (VOIDmode,
16452 condition, target1, target2)));
16453 if (split_branch_probability >= 0)
16454 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16458 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16462 gcc_assert (GET_MODE (dest) == QImode);
16464 ret = ix86_expand_compare (code, op0, op1);
16465 PUT_MODE (ret, QImode);
16466 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16469 /* Expand comparison setting or clearing carry flag. Return true when
16470 successful and set pop for the operation. */
16472 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16474 enum machine_mode mode =
16475 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16477 /* Do not handle DImode compares that go through special path. */
16478 if (mode == (TARGET_64BIT ? TImode : DImode))
16481 if (SCALAR_FLOAT_MODE_P (mode))
16483 rtx compare_op, compare_seq;
16485 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16487 /* Shortcut: following common codes never translate
16488 into carry flag compares. */
16489 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16490 || code == ORDERED || code == UNORDERED)
16493 /* These comparisons require zero flag; swap operands so they won't. */
16494 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16495 && !TARGET_IEEE_FP)
16500 code = swap_condition (code);
16503 /* Try to expand the comparison and verify that we end up with
16504 carry flag based comparison. This fails to be true only when
16505 we decide to expand comparison using arithmetic that is not
16506 too common scenario. */
16508 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16509 compare_seq = get_insns ();
16512 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16513 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16514 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16516 code = GET_CODE (compare_op);
16518 if (code != LTU && code != GEU)
16521 emit_insn (compare_seq);
16526 if (!INTEGRAL_MODE_P (mode))
16535 /* Convert a==0 into (unsigned)a<1. */
16538 if (op1 != const0_rtx)
16541 code = (code == EQ ? LTU : GEU);
16544 /* Convert a>b into b<a or a>=b-1. */
16547 if (CONST_INT_P (op1))
16549 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16550 /* Bail out on overflow. We still can swap operands but that
16551 would force loading of the constant into register. */
16552 if (op1 == const0_rtx
16553 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16555 code = (code == GTU ? GEU : LTU);
16562 code = (code == GTU ? LTU : GEU);
16566 /* Convert a>=0 into (unsigned)a<0x80000000. */
16569 if (mode == DImode || op1 != const0_rtx)
16571 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16572 code = (code == LT ? GEU : LTU);
16576 if (mode == DImode || op1 != constm1_rtx)
16578 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16579 code = (code == LE ? GEU : LTU);
16585 /* Swapping operands may cause constant to appear as first operand. */
16586 if (!nonimmediate_operand (op0, VOIDmode))
16588 if (!can_create_pseudo_p ())
16590 op0 = force_reg (mode, op0);
16592 *pop = ix86_expand_compare (code, op0, op1);
16593 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16598 ix86_expand_int_movcc (rtx operands[])
16600 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16601 rtx compare_seq, compare_op;
16602 enum machine_mode mode = GET_MODE (operands[0]);
16603 bool sign_bit_compare_p = false;
16604 rtx op0 = XEXP (operands[1], 0);
16605 rtx op1 = XEXP (operands[1], 1);
16608 compare_op = ix86_expand_compare (code, op0, op1);
16609 compare_seq = get_insns ();
16612 compare_code = GET_CODE (compare_op);
16614 if ((op1 == const0_rtx && (code == GE || code == LT))
16615 || (op1 == constm1_rtx && (code == GT || code == LE)))
16616 sign_bit_compare_p = true;
16618 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16619 HImode insns, we'd be swallowed in word prefix ops. */
16621 if ((mode != HImode || TARGET_FAST_PREFIX)
16622 && (mode != (TARGET_64BIT ? TImode : DImode))
16623 && CONST_INT_P (operands[2])
16624 && CONST_INT_P (operands[3]))
16626 rtx out = operands[0];
16627 HOST_WIDE_INT ct = INTVAL (operands[2]);
16628 HOST_WIDE_INT cf = INTVAL (operands[3]);
16629 HOST_WIDE_INT diff;
16632 /* Sign bit compares are better done using shifts than we do by using
16634 if (sign_bit_compare_p
16635 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
16637 /* Detect overlap between destination and compare sources. */
16640 if (!sign_bit_compare_p)
16643 bool fpcmp = false;
16645 compare_code = GET_CODE (compare_op);
16647 flags = XEXP (compare_op, 0);
16649 if (GET_MODE (flags) == CCFPmode
16650 || GET_MODE (flags) == CCFPUmode)
16654 = ix86_fp_compare_code_to_integer (compare_code);
16657 /* To simplify rest of code, restrict to the GEU case. */
16658 if (compare_code == LTU)
16660 HOST_WIDE_INT tmp = ct;
16663 compare_code = reverse_condition (compare_code);
16664 code = reverse_condition (code);
16669 PUT_CODE (compare_op,
16670 reverse_condition_maybe_unordered
16671 (GET_CODE (compare_op)));
16673 PUT_CODE (compare_op,
16674 reverse_condition (GET_CODE (compare_op)));
16678 if (reg_overlap_mentioned_p (out, op0)
16679 || reg_overlap_mentioned_p (out, op1))
16680 tmp = gen_reg_rtx (mode);
16682 if (mode == DImode)
16683 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16685 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16686 flags, compare_op));
16690 if (code == GT || code == GE)
16691 code = reverse_condition (code);
16694 HOST_WIDE_INT tmp = ct;
16699 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
16712 tmp = expand_simple_binop (mode, PLUS,
16714 copy_rtx (tmp), 1, OPTAB_DIRECT);
16725 tmp = expand_simple_binop (mode, IOR,
16727 copy_rtx (tmp), 1, OPTAB_DIRECT);
16729 else if (diff == -1 && ct)
16739 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16741 tmp = expand_simple_binop (mode, PLUS,
16742 copy_rtx (tmp), GEN_INT (cf),
16743 copy_rtx (tmp), 1, OPTAB_DIRECT);
16751 * andl cf - ct, dest
16761 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16764 tmp = expand_simple_binop (mode, AND,
16766 gen_int_mode (cf - ct, mode),
16767 copy_rtx (tmp), 1, OPTAB_DIRECT);
16769 tmp = expand_simple_binop (mode, PLUS,
16770 copy_rtx (tmp), GEN_INT (ct),
16771 copy_rtx (tmp), 1, OPTAB_DIRECT);
16774 if (!rtx_equal_p (tmp, out))
16775 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16782 enum machine_mode cmp_mode = GET_MODE (op0);
16785 tmp = ct, ct = cf, cf = tmp;
16788 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16790 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16792 /* We may be reversing unordered compare to normal compare, that
16793 is not valid in general (we may convert non-trapping condition
16794 to trapping one), however on i386 we currently emit all
16795 comparisons unordered. */
16796 compare_code = reverse_condition_maybe_unordered (compare_code);
16797 code = reverse_condition_maybe_unordered (code);
16801 compare_code = reverse_condition (compare_code);
16802 code = reverse_condition (code);
16806 compare_code = UNKNOWN;
16807 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
16808 && CONST_INT_P (op1))
16810 if (op1 == const0_rtx
16811 && (code == LT || code == GE))
16812 compare_code = code;
16813 else if (op1 == constm1_rtx)
16817 else if (code == GT)
16822 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16823 if (compare_code != UNKNOWN
16824 && GET_MODE (op0) == GET_MODE (out)
16825 && (cf == -1 || ct == -1))
16827 /* If lea code below could be used, only optimize
16828 if it results in a 2 insn sequence. */
16830 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16831 || diff == 3 || diff == 5 || diff == 9)
16832 || (compare_code == LT && ct == -1)
16833 || (compare_code == GE && cf == -1))
16836 * notl op1 (if necessary)
16844 code = reverse_condition (code);
16847 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16849 out = expand_simple_binop (mode, IOR,
16851 out, 1, OPTAB_DIRECT);
16852 if (out != operands[0])
16853 emit_move_insn (operands[0], out);
16860 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16861 || diff == 3 || diff == 5 || diff == 9)
16862 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16864 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16870 * lea cf(dest*(ct-cf)),dest
16874 * This also catches the degenerate setcc-only case.
16880 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16883 /* On x86_64 the lea instruction operates on Pmode, so we need
16884 to get arithmetics done in proper mode to match. */
16886 tmp = copy_rtx (out);
16890 out1 = copy_rtx (out);
16891 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16895 tmp = gen_rtx_PLUS (mode, tmp, out1);
16901 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16904 if (!rtx_equal_p (tmp, out))
16907 out = force_operand (tmp, copy_rtx (out));
16909 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16911 if (!rtx_equal_p (out, operands[0]))
16912 emit_move_insn (operands[0], copy_rtx (out));
16918 * General case: Jumpful:
16919 * xorl dest,dest cmpl op1, op2
16920 * cmpl op1, op2 movl ct, dest
16921 * setcc dest jcc 1f
16922 * decl dest movl cf, dest
16923 * andl (cf-ct),dest 1:
16926 * Size 20. Size 14.
16928 * This is reasonably steep, but branch mispredict costs are
16929 * high on modern cpus, so consider failing only if optimizing
16933 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16934 && BRANCH_COST (optimize_insn_for_speed_p (),
16939 enum machine_mode cmp_mode = GET_MODE (op0);
16944 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16946 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16948 /* We may be reversing unordered compare to normal compare,
16949 that is not valid in general (we may convert non-trapping
16950 condition to trapping one), however on i386 we currently
16951 emit all comparisons unordered. */
16952 code = reverse_condition_maybe_unordered (code);
16956 code = reverse_condition (code);
16957 if (compare_code != UNKNOWN)
16958 compare_code = reverse_condition (compare_code);
16962 if (compare_code != UNKNOWN)
16964 /* notl op1 (if needed)
16969 For x < 0 (resp. x <= -1) there will be no notl,
16970 so if possible swap the constants to get rid of the
16972 True/false will be -1/0 while code below (store flag
16973 followed by decrement) is 0/-1, so the constants need
16974 to be exchanged once more. */
16976 if (compare_code == GE || !cf)
16978 code = reverse_condition (code);
16983 HOST_WIDE_INT tmp = cf;
16988 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16992 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16994 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
16996 copy_rtx (out), 1, OPTAB_DIRECT);
16999 out = expand_simple_binop (mode, AND, copy_rtx (out),
17000 gen_int_mode (cf - ct, mode),
17001 copy_rtx (out), 1, OPTAB_DIRECT);
17003 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17004 copy_rtx (out), 1, OPTAB_DIRECT);
17005 if (!rtx_equal_p (out, operands[0]))
17006 emit_move_insn (operands[0], copy_rtx (out));
17012 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17014 /* Try a few things more with specific constants and a variable. */
17017 rtx var, orig_out, out, tmp;
17019 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17022 /* If one of the two operands is an interesting constant, load a
17023 constant with the above and mask it in with a logical operation. */
17025 if (CONST_INT_P (operands[2]))
17028 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17029 operands[3] = constm1_rtx, op = and_optab;
17030 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17031 operands[3] = const0_rtx, op = ior_optab;
17035 else if (CONST_INT_P (operands[3]))
17038 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17039 operands[2] = constm1_rtx, op = and_optab;
17040 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17041 operands[2] = const0_rtx, op = ior_optab;
17048 orig_out = operands[0];
17049 tmp = gen_reg_rtx (mode);
17052 /* Recurse to get the constant loaded. */
17053 if (ix86_expand_int_movcc (operands) == 0)
17056 /* Mask in the interesting variable. */
17057 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17059 if (!rtx_equal_p (out, orig_out))
17060 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17066 * For comparison with above,
17076 if (! nonimmediate_operand (operands[2], mode))
17077 operands[2] = force_reg (mode, operands[2]);
17078 if (! nonimmediate_operand (operands[3], mode))
17079 operands[3] = force_reg (mode, operands[3]);
17081 if (! register_operand (operands[2], VOIDmode)
17083 || ! register_operand (operands[3], VOIDmode)))
17084 operands[2] = force_reg (mode, operands[2]);
17087 && ! register_operand (operands[3], VOIDmode))
17088 operands[3] = force_reg (mode, operands[3]);
17090 emit_insn (compare_seq);
17091 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17092 gen_rtx_IF_THEN_ELSE (mode,
17093 compare_op, operands[2],
17098 /* Swap, force into registers, or otherwise massage the two operands
17099 to an sse comparison with a mask result. Thus we differ a bit from
17100 ix86_prepare_fp_compare_args which expects to produce a flags result.
17102 The DEST operand exists to help determine whether to commute commutative
17103 operators. The POP0/POP1 operands are updated in place. The new
17104 comparison code is returned, or UNKNOWN if not implementable. */
17106 static enum rtx_code
17107 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17108 rtx *pop0, rtx *pop1)
17116 /* We have no LTGT as an operator. We could implement it with
17117 NE & ORDERED, but this requires an extra temporary. It's
17118 not clear that it's worth it. */
17125 /* These are supported directly. */
17132 /* For commutative operators, try to canonicalize the destination
17133 operand to be first in the comparison - this helps reload to
17134 avoid extra moves. */
17135 if (!dest || !rtx_equal_p (dest, *pop1))
17143 /* These are not supported directly. Swap the comparison operands
17144 to transform into something that is supported. */
17148 code = swap_condition (code);
17152 gcc_unreachable ();
17158 /* Detect conditional moves that exactly match min/max operational
17159 semantics. Note that this is IEEE safe, as long as we don't
17160 interchange the operands.
17162 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17163 and TRUE if the operation is successful and instructions are emitted. */
17166 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17167 rtx cmp_op1, rtx if_true, rtx if_false)
17169 enum machine_mode mode;
17175 else if (code == UNGE)
17178 if_true = if_false;
17184 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17186 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17191 mode = GET_MODE (dest);
17193 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17194 but MODE may be a vector mode and thus not appropriate. */
17195 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17197 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17200 if_true = force_reg (mode, if_true);
17201 v = gen_rtvec (2, if_true, if_false);
17202 tmp = gen_rtx_UNSPEC (mode, v, u);
17206 code = is_min ? SMIN : SMAX;
17207 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17210 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17214 /* Expand an sse vector comparison. Return the register with the result. */
17217 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17218 rtx op_true, rtx op_false)
17220 enum machine_mode mode = GET_MODE (dest);
17223 cmp_op0 = force_reg (mode, cmp_op0);
17224 if (!nonimmediate_operand (cmp_op1, mode))
17225 cmp_op1 = force_reg (mode, cmp_op1);
17228 || reg_overlap_mentioned_p (dest, op_true)
17229 || reg_overlap_mentioned_p (dest, op_false))
17230 dest = gen_reg_rtx (mode);
17232 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17233 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17238 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17239 operations. This is used for both scalar and vector conditional moves. */
17242 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17244 enum machine_mode mode = GET_MODE (dest);
17247 if (op_false == CONST0_RTX (mode))
17249 op_true = force_reg (mode, op_true);
17250 x = gen_rtx_AND (mode, cmp, op_true);
17251 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17253 else if (op_true == CONST0_RTX (mode))
17255 op_false = force_reg (mode, op_false);
17256 x = gen_rtx_NOT (mode, cmp);
17257 x = gen_rtx_AND (mode, x, op_false);
17258 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17260 else if (TARGET_XOP)
17262 rtx pcmov = gen_rtx_SET (mode, dest,
17263 gen_rtx_IF_THEN_ELSE (mode, cmp,
17270 op_true = force_reg (mode, op_true);
17271 op_false = force_reg (mode, op_false);
17273 t2 = gen_reg_rtx (mode);
17275 t3 = gen_reg_rtx (mode);
17279 x = gen_rtx_AND (mode, op_true, cmp);
17280 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17282 x = gen_rtx_NOT (mode, cmp);
17283 x = gen_rtx_AND (mode, x, op_false);
17284 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17286 x = gen_rtx_IOR (mode, t3, t2);
17287 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17291 /* Expand a floating-point conditional move. Return true if successful. */
17294 ix86_expand_fp_movcc (rtx operands[])
17296 enum machine_mode mode = GET_MODE (operands[0]);
17297 enum rtx_code code = GET_CODE (operands[1]);
17298 rtx tmp, compare_op;
17299 rtx op0 = XEXP (operands[1], 0);
17300 rtx op1 = XEXP (operands[1], 1);
17302 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17304 enum machine_mode cmode;
17306 /* Since we've no cmove for sse registers, don't force bad register
17307 allocation just to gain access to it. Deny movcc when the
17308 comparison mode doesn't match the move mode. */
17309 cmode = GET_MODE (op0);
17310 if (cmode == VOIDmode)
17311 cmode = GET_MODE (op1);
17315 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17316 if (code == UNKNOWN)
17319 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17320 operands[2], operands[3]))
17323 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17324 operands[2], operands[3]);
17325 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17329 /* The floating point conditional move instructions don't directly
17330 support conditions resulting from a signed integer comparison. */
17332 compare_op = ix86_expand_compare (code, op0, op1);
17333 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17335 tmp = gen_reg_rtx (QImode);
17336 ix86_expand_setcc (tmp, code, op0, op1);
17338 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17341 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17342 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17343 operands[2], operands[3])));
17348 /* Expand a floating-point vector conditional move; a vcond operation
17349 rather than a movcc operation. */
17352 ix86_expand_fp_vcond (rtx operands[])
17354 enum rtx_code code = GET_CODE (operands[3]);
17357 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17358 &operands[4], &operands[5]);
17359 if (code == UNKNOWN)
17362 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17363 operands[5], operands[1], operands[2]))
17366 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17367 operands[1], operands[2]);
17368 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17372 /* Expand a signed/unsigned integral vector conditional move. */
17375 ix86_expand_int_vcond (rtx operands[])
17377 enum machine_mode mode = GET_MODE (operands[0]);
17378 enum rtx_code code = GET_CODE (operands[3]);
17379 bool negate = false;
17382 cop0 = operands[4];
17383 cop1 = operands[5];
17385 /* XOP supports all of the comparisons on all vector int types. */
17388 /* Canonicalize the comparison to EQ, GT, GTU. */
17399 code = reverse_condition (code);
17405 code = reverse_condition (code);
17411 code = swap_condition (code);
17412 x = cop0, cop0 = cop1, cop1 = x;
17416 gcc_unreachable ();
17419 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17420 if (mode == V2DImode)
17425 /* SSE4.1 supports EQ. */
17426 if (!TARGET_SSE4_1)
17432 /* SSE4.2 supports GT/GTU. */
17433 if (!TARGET_SSE4_2)
17438 gcc_unreachable ();
17442 /* Unsigned parallel compare is not supported by the hardware.
17443 Play some tricks to turn this into a signed comparison
17447 cop0 = force_reg (mode, cop0);
17455 rtx (*gen_sub3) (rtx, rtx, rtx);
17457 /* Subtract (-(INT MAX) - 1) from both operands to make
17459 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17461 gen_sub3 = (mode == V4SImode
17462 ? gen_subv4si3 : gen_subv2di3);
17463 t1 = gen_reg_rtx (mode);
17464 emit_insn (gen_sub3 (t1, cop0, mask));
17466 t2 = gen_reg_rtx (mode);
17467 emit_insn (gen_sub3 (t2, cop1, mask));
17477 /* Perform a parallel unsigned saturating subtraction. */
17478 x = gen_reg_rtx (mode);
17479 emit_insn (gen_rtx_SET (VOIDmode, x,
17480 gen_rtx_US_MINUS (mode, cop0, cop1)));
17483 cop1 = CONST0_RTX (mode);
17489 gcc_unreachable ();
17494 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17495 operands[1+negate], operands[2-negate]);
17497 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17498 operands[2-negate]);
17502 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17503 true if we should do zero extension, else sign extension. HIGH_P is
17504 true if we want the N/2 high elements, else the low elements. */
17507 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17509 enum machine_mode imode = GET_MODE (operands[1]);
17510 rtx (*unpack)(rtx, rtx, rtx);
17517 unpack = gen_vec_interleave_highv16qi;
17519 unpack = gen_vec_interleave_lowv16qi;
17523 unpack = gen_vec_interleave_highv8hi;
17525 unpack = gen_vec_interleave_lowv8hi;
17529 unpack = gen_vec_interleave_highv4si;
17531 unpack = gen_vec_interleave_lowv4si;
17534 gcc_unreachable ();
17537 dest = gen_lowpart (imode, operands[0]);
17540 se = force_reg (imode, CONST0_RTX (imode));
17542 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17543 operands[1], pc_rtx, pc_rtx);
17545 emit_insn (unpack (dest, operands[1], se));
17548 /* This function performs the same task as ix86_expand_sse_unpack,
17549 but with SSE4.1 instructions. */
17552 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17554 enum machine_mode imode = GET_MODE (operands[1]);
17555 rtx (*unpack)(rtx, rtx);
17562 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17564 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17568 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17570 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17574 unpack = gen_sse4_1_zero_extendv2siv2di2;
17576 unpack = gen_sse4_1_sign_extendv2siv2di2;
17579 gcc_unreachable ();
17582 dest = operands[0];
17585 /* Shift higher 8 bytes to lower 8 bytes. */
17586 src = gen_reg_rtx (imode);
17587 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17588 gen_lowpart (V1TImode, operands[1]),
17594 emit_insn (unpack (dest, src));
17597 /* Expand conditional increment or decrement using adb/sbb instructions.
17598 The default case using setcc followed by the conditional move can be
17599 done by generic code. */
17601 ix86_expand_int_addcc (rtx operands[])
17603 enum rtx_code code = GET_CODE (operands[1]);
17605 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17607 rtx val = const0_rtx;
17608 bool fpcmp = false;
17609 enum machine_mode mode;
17610 rtx op0 = XEXP (operands[1], 0);
17611 rtx op1 = XEXP (operands[1], 1);
17613 if (operands[3] != const1_rtx
17614 && operands[3] != constm1_rtx)
17616 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17618 code = GET_CODE (compare_op);
17620 flags = XEXP (compare_op, 0);
17622 if (GET_MODE (flags) == CCFPmode
17623 || GET_MODE (flags) == CCFPUmode)
17626 code = ix86_fp_compare_code_to_integer (code);
17633 PUT_CODE (compare_op,
17634 reverse_condition_maybe_unordered
17635 (GET_CODE (compare_op)));
17637 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17640 mode = GET_MODE (operands[0]);
17642 /* Construct either adc or sbb insn. */
17643 if ((code == LTU) == (operands[3] == constm1_rtx))
17648 insn = gen_subqi3_carry;
17651 insn = gen_subhi3_carry;
17654 insn = gen_subsi3_carry;
17657 insn = gen_subdi3_carry;
17660 gcc_unreachable ();
17668 insn = gen_addqi3_carry;
17671 insn = gen_addhi3_carry;
17674 insn = gen_addsi3_carry;
17677 insn = gen_adddi3_carry;
17680 gcc_unreachable ();
17683 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17689 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17690 works for floating pointer parameters and nonoffsetable memories.
17691 For pushes, it returns just stack offsets; the values will be saved
17692 in the right order. Maximally three parts are generated. */
17695 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17700 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17702 size = (GET_MODE_SIZE (mode) + 4) / 8;
17704 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17705 gcc_assert (size >= 2 && size <= 4);
17707 /* Optimize constant pool reference to immediates. This is used by fp
17708 moves, that force all constants to memory to allow combining. */
17709 if (MEM_P (operand) && MEM_READONLY_P (operand))
17711 rtx tmp = maybe_get_pool_constant (operand);
17716 if (MEM_P (operand) && !offsettable_memref_p (operand))
17718 /* The only non-offsetable memories we handle are pushes. */
17719 int ok = push_operand (operand, VOIDmode);
17723 operand = copy_rtx (operand);
17724 PUT_MODE (operand, Pmode);
17725 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17729 if (GET_CODE (operand) == CONST_VECTOR)
17731 enum machine_mode imode = int_mode_for_mode (mode);
17732 /* Caution: if we looked through a constant pool memory above,
17733 the operand may actually have a different mode now. That's
17734 ok, since we want to pun this all the way back to an integer. */
17735 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17736 gcc_assert (operand != NULL);
17742 if (mode == DImode)
17743 split_di (&operand, 1, &parts[0], &parts[1]);
17748 if (REG_P (operand))
17750 gcc_assert (reload_completed);
17751 for (i = 0; i < size; i++)
17752 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17754 else if (offsettable_memref_p (operand))
17756 operand = adjust_address (operand, SImode, 0);
17757 parts[0] = operand;
17758 for (i = 1; i < size; i++)
17759 parts[i] = adjust_address (operand, SImode, 4 * i);
17761 else if (GET_CODE (operand) == CONST_DOUBLE)
17766 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17770 real_to_target (l, &r, mode);
17771 parts[3] = gen_int_mode (l[3], SImode);
17772 parts[2] = gen_int_mode (l[2], SImode);
17775 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17776 parts[2] = gen_int_mode (l[2], SImode);
17779 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17782 gcc_unreachable ();
17784 parts[1] = gen_int_mode (l[1], SImode);
17785 parts[0] = gen_int_mode (l[0], SImode);
17788 gcc_unreachable ();
17793 if (mode == TImode)
17794 split_ti (&operand, 1, &parts[0], &parts[1]);
17795 if (mode == XFmode || mode == TFmode)
17797 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17798 if (REG_P (operand))
17800 gcc_assert (reload_completed);
17801 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17802 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17804 else if (offsettable_memref_p (operand))
17806 operand = adjust_address (operand, DImode, 0);
17807 parts[0] = operand;
17808 parts[1] = adjust_address (operand, upper_mode, 8);
17810 else if (GET_CODE (operand) == CONST_DOUBLE)
17815 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17816 real_to_target (l, &r, mode);
17818 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17819 if (HOST_BITS_PER_WIDE_INT >= 64)
17822 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17823 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17826 parts[0] = immed_double_const (l[0], l[1], DImode);
17828 if (upper_mode == SImode)
17829 parts[1] = gen_int_mode (l[2], SImode);
17830 else if (HOST_BITS_PER_WIDE_INT >= 64)
17833 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17834 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17837 parts[1] = immed_double_const (l[2], l[3], DImode);
17840 gcc_unreachable ();
17847 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17848 Return false when normal moves are needed; true when all required
17849 insns have been emitted. Operands 2-4 contain the input values
17850 int the correct order; operands 5-7 contain the output values. */
17853 ix86_split_long_move (rtx operands[])
17858 int collisions = 0;
17859 enum machine_mode mode = GET_MODE (operands[0]);
17860 bool collisionparts[4];
17862 /* The DFmode expanders may ask us to move double.
17863 For 64bit target this is single move. By hiding the fact
17864 here we simplify i386.md splitters. */
17865 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17867 /* Optimize constant pool reference to immediates. This is used by
17868 fp moves, that force all constants to memory to allow combining. */
17870 if (MEM_P (operands[1])
17871 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17872 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17873 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17874 if (push_operand (operands[0], VOIDmode))
17876 operands[0] = copy_rtx (operands[0]);
17877 PUT_MODE (operands[0], Pmode);
17880 operands[0] = gen_lowpart (DImode, operands[0]);
17881 operands[1] = gen_lowpart (DImode, operands[1]);
17882 emit_move_insn (operands[0], operands[1]);
17886 /* The only non-offsettable memory we handle is push. */
17887 if (push_operand (operands[0], VOIDmode))
17890 gcc_assert (!MEM_P (operands[0])
17891 || offsettable_memref_p (operands[0]));
17893 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17894 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17896 /* When emitting push, take care for source operands on the stack. */
17897 if (push && MEM_P (operands[1])
17898 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17900 rtx src_base = XEXP (part[1][nparts - 1], 0);
17902 /* Compensate for the stack decrement by 4. */
17903 if (!TARGET_64BIT && nparts == 3
17904 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17905 src_base = plus_constant (src_base, 4);
17907 /* src_base refers to the stack pointer and is
17908 automatically decreased by emitted push. */
17909 for (i = 0; i < nparts; i++)
17910 part[1][i] = change_address (part[1][i],
17911 GET_MODE (part[1][i]), src_base);
17914 /* We need to do copy in the right order in case an address register
17915 of the source overlaps the destination. */
17916 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17920 for (i = 0; i < nparts; i++)
17923 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17924 if (collisionparts[i])
17928 /* Collision in the middle part can be handled by reordering. */
17929 if (collisions == 1 && nparts == 3 && collisionparts [1])
17931 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17932 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17934 else if (collisions == 1
17936 && (collisionparts [1] || collisionparts [2]))
17938 if (collisionparts [1])
17940 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17941 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17945 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17946 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17950 /* If there are more collisions, we can't handle it by reordering.
17951 Do an lea to the last part and use only one colliding move. */
17952 else if (collisions > 1)
17958 base = part[0][nparts - 1];
17960 /* Handle the case when the last part isn't valid for lea.
17961 Happens in 64-bit mode storing the 12-byte XFmode. */
17962 if (GET_MODE (base) != Pmode)
17963 base = gen_rtx_REG (Pmode, REGNO (base));
17965 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17966 part[1][0] = replace_equiv_address (part[1][0], base);
17967 for (i = 1; i < nparts; i++)
17969 tmp = plus_constant (base, UNITS_PER_WORD * i);
17970 part[1][i] = replace_equiv_address (part[1][i], tmp);
17981 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17982 emit_insn (gen_addsi3 (stack_pointer_rtx,
17983 stack_pointer_rtx, GEN_INT (-4)));
17984 emit_move_insn (part[0][2], part[1][2]);
17986 else if (nparts == 4)
17988 emit_move_insn (part[0][3], part[1][3]);
17989 emit_move_insn (part[0][2], part[1][2]);
17994 /* In 64bit mode we don't have 32bit push available. In case this is
17995 register, it is OK - we will just use larger counterpart. We also
17996 retype memory - these comes from attempt to avoid REX prefix on
17997 moving of second half of TFmode value. */
17998 if (GET_MODE (part[1][1]) == SImode)
18000 switch (GET_CODE (part[1][1]))
18003 part[1][1] = adjust_address (part[1][1], DImode, 0);
18007 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18011 gcc_unreachable ();
18014 if (GET_MODE (part[1][0]) == SImode)
18015 part[1][0] = part[1][1];
18018 emit_move_insn (part[0][1], part[1][1]);
18019 emit_move_insn (part[0][0], part[1][0]);
18023 /* Choose correct order to not overwrite the source before it is copied. */
18024 if ((REG_P (part[0][0])
18025 && REG_P (part[1][1])
18026 && (REGNO (part[0][0]) == REGNO (part[1][1])
18028 && REGNO (part[0][0]) == REGNO (part[1][2]))
18030 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18032 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18034 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18036 operands[2 + i] = part[0][j];
18037 operands[6 + i] = part[1][j];
18042 for (i = 0; i < nparts; i++)
18044 operands[2 + i] = part[0][i];
18045 operands[6 + i] = part[1][i];
18049 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18050 if (optimize_insn_for_size_p ())
18052 for (j = 0; j < nparts - 1; j++)
18053 if (CONST_INT_P (operands[6 + j])
18054 && operands[6 + j] != const0_rtx
18055 && REG_P (operands[2 + j]))
18056 for (i = j; i < nparts - 1; i++)
18057 if (CONST_INT_P (operands[7 + i])
18058 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18059 operands[7 + i] = operands[2 + j];
18062 for (i = 0; i < nparts; i++)
18063 emit_move_insn (operands[2 + i], operands[6 + i]);
18068 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18069 left shift by a constant, either using a single shift or
18070 a sequence of add instructions. */
18073 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18077 emit_insn ((mode == DImode
18079 : gen_adddi3) (operand, operand, operand));
18081 else if (!optimize_insn_for_size_p ()
18082 && count * ix86_cost->add <= ix86_cost->shift_const)
18085 for (i=0; i<count; i++)
18087 emit_insn ((mode == DImode
18089 : gen_adddi3) (operand, operand, operand));
18093 emit_insn ((mode == DImode
18095 : gen_ashldi3) (operand, operand, GEN_INT (count)));
18099 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18101 rtx low[2], high[2];
18103 const int single_width = mode == DImode ? 32 : 64;
18105 if (CONST_INT_P (operands[2]))
18107 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18108 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18110 if (count >= single_width)
18112 emit_move_insn (high[0], low[1]);
18113 emit_move_insn (low[0], const0_rtx);
18115 if (count > single_width)
18116 ix86_expand_ashl_const (high[0], count - single_width, mode);
18120 if (!rtx_equal_p (operands[0], operands[1]))
18121 emit_move_insn (operands[0], operands[1]);
18122 emit_insn ((mode == DImode
18124 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
18125 ix86_expand_ashl_const (low[0], count, mode);
18130 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18132 if (operands[1] == const1_rtx)
18134 /* Assuming we've chosen a QImode capable registers, then 1 << N
18135 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18136 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18138 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18140 ix86_expand_clear (low[0]);
18141 ix86_expand_clear (high[0]);
18142 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
18144 d = gen_lowpart (QImode, low[0]);
18145 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18146 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18147 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18149 d = gen_lowpart (QImode, high[0]);
18150 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18151 s = gen_rtx_NE (QImode, flags, const0_rtx);
18152 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18155 /* Otherwise, we can get the same results by manually performing
18156 a bit extract operation on bit 5/6, and then performing the two
18157 shifts. The two methods of getting 0/1 into low/high are exactly
18158 the same size. Avoiding the shift in the bit extract case helps
18159 pentium4 a bit; no one else seems to care much either way. */
18164 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18165 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
18167 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
18168 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18170 emit_insn ((mode == DImode
18172 : gen_lshrdi3) (high[0], high[0],
18173 GEN_INT (mode == DImode ? 5 : 6)));
18174 emit_insn ((mode == DImode
18176 : gen_anddi3) (high[0], high[0], const1_rtx));
18177 emit_move_insn (low[0], high[0]);
18178 emit_insn ((mode == DImode
18180 : gen_xordi3) (low[0], low[0], const1_rtx));
18183 emit_insn ((mode == DImode
18185 : gen_ashldi3) (low[0], low[0], operands[2]));
18186 emit_insn ((mode == DImode
18188 : gen_ashldi3) (high[0], high[0], operands[2]));
18192 if (operands[1] == constm1_rtx)
18194 /* For -1 << N, we can avoid the shld instruction, because we
18195 know that we're shifting 0...31/63 ones into a -1. */
18196 emit_move_insn (low[0], constm1_rtx);
18197 if (optimize_insn_for_size_p ())
18198 emit_move_insn (high[0], low[0]);
18200 emit_move_insn (high[0], constm1_rtx);
18204 if (!rtx_equal_p (operands[0], operands[1]))
18205 emit_move_insn (operands[0], operands[1]);
18207 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18208 emit_insn ((mode == DImode
18210 : gen_x86_64_shld) (high[0], low[0], operands[2]));
18213 emit_insn ((mode == DImode
18215 : gen_ashldi3) (low[0], low[0], operands[2]));
18217 if (TARGET_CMOVE && scratch)
18219 ix86_expand_clear (scratch);
18220 emit_insn ((mode == DImode
18221 ? gen_x86_shiftsi_adj_1
18222 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
18226 emit_insn ((mode == DImode
18227 ? gen_x86_shiftsi_adj_2
18228 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
18232 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18234 rtx low[2], high[2];
18236 const int single_width = mode == DImode ? 32 : 64;
18238 if (CONST_INT_P (operands[2]))
18240 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18241 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18243 if (count == single_width * 2 - 1)
18245 emit_move_insn (high[0], high[1]);
18246 emit_insn ((mode == DImode
18248 : gen_ashrdi3) (high[0], high[0],
18249 GEN_INT (single_width - 1)));
18250 emit_move_insn (low[0], high[0]);
18253 else if (count >= single_width)
18255 emit_move_insn (low[0], high[1]);
18256 emit_move_insn (high[0], low[0]);
18257 emit_insn ((mode == DImode
18259 : gen_ashrdi3) (high[0], high[0],
18260 GEN_INT (single_width - 1)));
18261 if (count > single_width)
18262 emit_insn ((mode == DImode
18264 : gen_ashrdi3) (low[0], low[0],
18265 GEN_INT (count - single_width)));
18269 if (!rtx_equal_p (operands[0], operands[1]))
18270 emit_move_insn (operands[0], operands[1]);
18271 emit_insn ((mode == DImode
18273 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18274 emit_insn ((mode == DImode
18276 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
18281 if (!rtx_equal_p (operands[0], operands[1]))
18282 emit_move_insn (operands[0], operands[1]);
18284 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18286 emit_insn ((mode == DImode
18288 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18289 emit_insn ((mode == DImode
18291 : gen_ashrdi3) (high[0], high[0], operands[2]));
18293 if (TARGET_CMOVE && scratch)
18295 emit_move_insn (scratch, high[0]);
18296 emit_insn ((mode == DImode
18298 : gen_ashrdi3) (scratch, scratch,
18299 GEN_INT (single_width - 1)));
18300 emit_insn ((mode == DImode
18301 ? gen_x86_shiftsi_adj_1
18302 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18306 emit_insn ((mode == DImode
18307 ? gen_x86_shiftsi_adj_3
18308 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
18313 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18315 rtx low[2], high[2];
18317 const int single_width = mode == DImode ? 32 : 64;
18319 if (CONST_INT_P (operands[2]))
18321 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18322 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18324 if (count >= single_width)
18326 emit_move_insn (low[0], high[1]);
18327 ix86_expand_clear (high[0]);
18329 if (count > single_width)
18330 emit_insn ((mode == DImode
18332 : gen_lshrdi3) (low[0], low[0],
18333 GEN_INT (count - single_width)));
18337 if (!rtx_equal_p (operands[0], operands[1]))
18338 emit_move_insn (operands[0], operands[1]);
18339 emit_insn ((mode == DImode
18341 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18342 emit_insn ((mode == DImode
18344 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18349 if (!rtx_equal_p (operands[0], operands[1]))
18350 emit_move_insn (operands[0], operands[1]);
18352 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18354 emit_insn ((mode == DImode
18356 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18357 emit_insn ((mode == DImode
18359 : gen_lshrdi3) (high[0], high[0], operands[2]));
18361 /* Heh. By reversing the arguments, we can reuse this pattern. */
18362 if (TARGET_CMOVE && scratch)
18364 ix86_expand_clear (scratch);
18365 emit_insn ((mode == DImode
18366 ? gen_x86_shiftsi_adj_1
18367 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18371 emit_insn ((mode == DImode
18372 ? gen_x86_shiftsi_adj_2
18373 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18377 /* Predict just emitted jump instruction to be taken with probability PROB. */
18379 predict_jump (int prob)
18381 rtx insn = get_last_insn ();
18382 gcc_assert (JUMP_P (insn));
18383 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18386 /* Helper function for the string operations below. Dest VARIABLE whether
18387 it is aligned to VALUE bytes. If true, jump to the label. */
18389 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18391 rtx label = gen_label_rtx ();
18392 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18393 if (GET_MODE (variable) == DImode)
18394 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18396 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18397 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18400 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18402 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18406 /* Adjust COUNTER by the VALUE. */
18408 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18410 if (GET_MODE (countreg) == DImode)
18411 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18413 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18416 /* Zero extend possibly SImode EXP to Pmode register. */
18418 ix86_zero_extend_to_Pmode (rtx exp)
18421 if (GET_MODE (exp) == VOIDmode)
18422 return force_reg (Pmode, exp);
18423 if (GET_MODE (exp) == Pmode)
18424 return copy_to_mode_reg (Pmode, exp);
18425 r = gen_reg_rtx (Pmode);
18426 emit_insn (gen_zero_extendsidi2 (r, exp));
18430 /* Divide COUNTREG by SCALE. */
18432 scale_counter (rtx countreg, int scale)
18438 if (CONST_INT_P (countreg))
18439 return GEN_INT (INTVAL (countreg) / scale);
18440 gcc_assert (REG_P (countreg));
18442 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18443 GEN_INT (exact_log2 (scale)),
18444 NULL, 1, OPTAB_DIRECT);
18448 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18449 DImode for constant loop counts. */
18451 static enum machine_mode
18452 counter_mode (rtx count_exp)
18454 if (GET_MODE (count_exp) != VOIDmode)
18455 return GET_MODE (count_exp);
18456 if (!CONST_INT_P (count_exp))
18458 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18463 /* When SRCPTR is non-NULL, output simple loop to move memory
18464 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18465 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18466 equivalent loop to set memory by VALUE (supposed to be in MODE).
18468 The size is rounded down to whole number of chunk size moved at once.
18469 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18473 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18474 rtx destptr, rtx srcptr, rtx value,
18475 rtx count, enum machine_mode mode, int unroll,
18478 rtx out_label, top_label, iter, tmp;
18479 enum machine_mode iter_mode = counter_mode (count);
18480 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18481 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18487 top_label = gen_label_rtx ();
18488 out_label = gen_label_rtx ();
18489 iter = gen_reg_rtx (iter_mode);
18491 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18492 NULL, 1, OPTAB_DIRECT);
18493 /* Those two should combine. */
18494 if (piece_size == const1_rtx)
18496 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18498 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18500 emit_move_insn (iter, const0_rtx);
18502 emit_label (top_label);
18504 tmp = convert_modes (Pmode, iter_mode, iter, true);
18505 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18506 destmem = change_address (destmem, mode, x_addr);
18510 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18511 srcmem = change_address (srcmem, mode, y_addr);
18513 /* When unrolling for chips that reorder memory reads and writes,
18514 we can save registers by using single temporary.
18515 Also using 4 temporaries is overkill in 32bit mode. */
18516 if (!TARGET_64BIT && 0)
18518 for (i = 0; i < unroll; i++)
18523 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18525 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18527 emit_move_insn (destmem, srcmem);
18533 gcc_assert (unroll <= 4);
18534 for (i = 0; i < unroll; i++)
18536 tmpreg[i] = gen_reg_rtx (mode);
18540 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18542 emit_move_insn (tmpreg[i], srcmem);
18544 for (i = 0; i < unroll; i++)
18549 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18551 emit_move_insn (destmem, tmpreg[i]);
18556 for (i = 0; i < unroll; i++)
18560 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18561 emit_move_insn (destmem, value);
18564 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18565 true, OPTAB_LIB_WIDEN);
18567 emit_move_insn (iter, tmp);
18569 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18571 if (expected_size != -1)
18573 expected_size /= GET_MODE_SIZE (mode) * unroll;
18574 if (expected_size == 0)
18576 else if (expected_size > REG_BR_PROB_BASE)
18577 predict_jump (REG_BR_PROB_BASE - 1);
18579 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18582 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18583 iter = ix86_zero_extend_to_Pmode (iter);
18584 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18585 true, OPTAB_LIB_WIDEN);
18586 if (tmp != destptr)
18587 emit_move_insn (destptr, tmp);
18590 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18591 true, OPTAB_LIB_WIDEN);
18593 emit_move_insn (srcptr, tmp);
18595 emit_label (out_label);
18598 /* Output "rep; mov" instruction.
18599 Arguments have same meaning as for previous function */
18601 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18602 rtx destptr, rtx srcptr,
18604 enum machine_mode mode)
18610 /* If the size is known, it is shorter to use rep movs. */
18611 if (mode == QImode && CONST_INT_P (count)
18612 && !(INTVAL (count) & 3))
18615 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18616 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18617 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18618 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18619 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18620 if (mode != QImode)
18622 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18623 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18624 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18625 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18626 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18627 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18631 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18632 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18634 if (CONST_INT_P (count))
18636 count = GEN_INT (INTVAL (count)
18637 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18638 destmem = shallow_copy_rtx (destmem);
18639 srcmem = shallow_copy_rtx (srcmem);
18640 set_mem_size (destmem, count);
18641 set_mem_size (srcmem, count);
18645 if (MEM_SIZE (destmem))
18646 set_mem_size (destmem, NULL_RTX);
18647 if (MEM_SIZE (srcmem))
18648 set_mem_size (srcmem, NULL_RTX);
18650 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18654 /* Output "rep; stos" instruction.
18655 Arguments have same meaning as for previous function */
18657 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18658 rtx count, enum machine_mode mode,
18664 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18665 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18666 value = force_reg (mode, gen_lowpart (mode, value));
18667 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18668 if (mode != QImode)
18670 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18671 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18672 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18675 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18676 if (orig_value == const0_rtx && CONST_INT_P (count))
18678 count = GEN_INT (INTVAL (count)
18679 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18680 destmem = shallow_copy_rtx (destmem);
18681 set_mem_size (destmem, count);
18683 else if (MEM_SIZE (destmem))
18684 set_mem_size (destmem, NULL_RTX);
18685 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18689 emit_strmov (rtx destmem, rtx srcmem,
18690 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18692 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18693 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18694 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18697 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18699 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18700 rtx destptr, rtx srcptr, rtx count, int max_size)
18703 if (CONST_INT_P (count))
18705 HOST_WIDE_INT countval = INTVAL (count);
18708 if ((countval & 0x10) && max_size > 16)
18712 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18713 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18716 gcc_unreachable ();
18719 if ((countval & 0x08) && max_size > 8)
18722 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18725 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18726 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18730 if ((countval & 0x04) && max_size > 4)
18732 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18735 if ((countval & 0x02) && max_size > 2)
18737 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18740 if ((countval & 0x01) && max_size > 1)
18742 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18749 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18750 count, 1, OPTAB_DIRECT);
18751 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18752 count, QImode, 1, 4);
18756 /* When there are stringops, we can cheaply increase dest and src pointers.
18757 Otherwise we save code size by maintaining offset (zero is readily
18758 available from preceding rep operation) and using x86 addressing modes.
18760 if (TARGET_SINGLE_STRINGOP)
18764 rtx label = ix86_expand_aligntest (count, 4, true);
18765 src = change_address (srcmem, SImode, srcptr);
18766 dest = change_address (destmem, SImode, destptr);
18767 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18768 emit_label (label);
18769 LABEL_NUSES (label) = 1;
18773 rtx label = ix86_expand_aligntest (count, 2, true);
18774 src = change_address (srcmem, HImode, srcptr);
18775 dest = change_address (destmem, HImode, destptr);
18776 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18777 emit_label (label);
18778 LABEL_NUSES (label) = 1;
18782 rtx label = ix86_expand_aligntest (count, 1, true);
18783 src = change_address (srcmem, QImode, srcptr);
18784 dest = change_address (destmem, QImode, destptr);
18785 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18786 emit_label (label);
18787 LABEL_NUSES (label) = 1;
18792 rtx offset = force_reg (Pmode, const0_rtx);
18797 rtx label = ix86_expand_aligntest (count, 4, true);
18798 src = change_address (srcmem, SImode, srcptr);
18799 dest = change_address (destmem, SImode, destptr);
18800 emit_move_insn (dest, src);
18801 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18802 true, OPTAB_LIB_WIDEN);
18804 emit_move_insn (offset, tmp);
18805 emit_label (label);
18806 LABEL_NUSES (label) = 1;
18810 rtx label = ix86_expand_aligntest (count, 2, true);
18811 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18812 src = change_address (srcmem, HImode, tmp);
18813 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18814 dest = change_address (destmem, HImode, tmp);
18815 emit_move_insn (dest, src);
18816 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18817 true, OPTAB_LIB_WIDEN);
18819 emit_move_insn (offset, tmp);
18820 emit_label (label);
18821 LABEL_NUSES (label) = 1;
18825 rtx label = ix86_expand_aligntest (count, 1, true);
18826 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18827 src = change_address (srcmem, QImode, tmp);
18828 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18829 dest = change_address (destmem, QImode, tmp);
18830 emit_move_insn (dest, src);
18831 emit_label (label);
18832 LABEL_NUSES (label) = 1;
18837 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18839 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18840 rtx count, int max_size)
18843 expand_simple_binop (counter_mode (count), AND, count,
18844 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18845 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18846 gen_lowpart (QImode, value), count, QImode,
18850 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18852 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18856 if (CONST_INT_P (count))
18858 HOST_WIDE_INT countval = INTVAL (count);
18861 if ((countval & 0x10) && max_size > 16)
18865 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18866 emit_insn (gen_strset (destptr, dest, value));
18867 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18868 emit_insn (gen_strset (destptr, dest, value));
18871 gcc_unreachable ();
18874 if ((countval & 0x08) && max_size > 8)
18878 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18879 emit_insn (gen_strset (destptr, dest, value));
18883 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18884 emit_insn (gen_strset (destptr, dest, value));
18885 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18886 emit_insn (gen_strset (destptr, dest, value));
18890 if ((countval & 0x04) && max_size > 4)
18892 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18893 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18896 if ((countval & 0x02) && max_size > 2)
18898 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18899 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18902 if ((countval & 0x01) && max_size > 1)
18904 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18905 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18912 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18917 rtx label = ix86_expand_aligntest (count, 16, true);
18920 dest = change_address (destmem, DImode, destptr);
18921 emit_insn (gen_strset (destptr, dest, value));
18922 emit_insn (gen_strset (destptr, dest, value));
18926 dest = change_address (destmem, SImode, destptr);
18927 emit_insn (gen_strset (destptr, dest, value));
18928 emit_insn (gen_strset (destptr, dest, value));
18929 emit_insn (gen_strset (destptr, dest, value));
18930 emit_insn (gen_strset (destptr, dest, value));
18932 emit_label (label);
18933 LABEL_NUSES (label) = 1;
18937 rtx label = ix86_expand_aligntest (count, 8, true);
18940 dest = change_address (destmem, DImode, destptr);
18941 emit_insn (gen_strset (destptr, dest, value));
18945 dest = change_address (destmem, SImode, destptr);
18946 emit_insn (gen_strset (destptr, dest, value));
18947 emit_insn (gen_strset (destptr, dest, value));
18949 emit_label (label);
18950 LABEL_NUSES (label) = 1;
18954 rtx label = ix86_expand_aligntest (count, 4, true);
18955 dest = change_address (destmem, SImode, destptr);
18956 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18957 emit_label (label);
18958 LABEL_NUSES (label) = 1;
18962 rtx label = ix86_expand_aligntest (count, 2, true);
18963 dest = change_address (destmem, HImode, destptr);
18964 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18965 emit_label (label);
18966 LABEL_NUSES (label) = 1;
18970 rtx label = ix86_expand_aligntest (count, 1, true);
18971 dest = change_address (destmem, QImode, destptr);
18972 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18973 emit_label (label);
18974 LABEL_NUSES (label) = 1;
18978 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18979 DESIRED_ALIGNMENT. */
18981 expand_movmem_prologue (rtx destmem, rtx srcmem,
18982 rtx destptr, rtx srcptr, rtx count,
18983 int align, int desired_alignment)
18985 if (align <= 1 && desired_alignment > 1)
18987 rtx label = ix86_expand_aligntest (destptr, 1, false);
18988 srcmem = change_address (srcmem, QImode, srcptr);
18989 destmem = change_address (destmem, QImode, destptr);
18990 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18991 ix86_adjust_counter (count, 1);
18992 emit_label (label);
18993 LABEL_NUSES (label) = 1;
18995 if (align <= 2 && desired_alignment > 2)
18997 rtx label = ix86_expand_aligntest (destptr, 2, false);
18998 srcmem = change_address (srcmem, HImode, srcptr);
18999 destmem = change_address (destmem, HImode, destptr);
19000 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19001 ix86_adjust_counter (count, 2);
19002 emit_label (label);
19003 LABEL_NUSES (label) = 1;
19005 if (align <= 4 && desired_alignment > 4)
19007 rtx label = ix86_expand_aligntest (destptr, 4, false);
19008 srcmem = change_address (srcmem, SImode, srcptr);
19009 destmem = change_address (destmem, SImode, destptr);
19010 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19011 ix86_adjust_counter (count, 4);
19012 emit_label (label);
19013 LABEL_NUSES (label) = 1;
19015 gcc_assert (desired_alignment <= 8);
19018 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19019 ALIGN_BYTES is how many bytes need to be copied. */
19021 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19022 int desired_align, int align_bytes)
19025 rtx src_size, dst_size;
19027 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19028 if (src_align_bytes >= 0)
19029 src_align_bytes = desired_align - src_align_bytes;
19030 src_size = MEM_SIZE (src);
19031 dst_size = MEM_SIZE (dst);
19032 if (align_bytes & 1)
19034 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19035 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19037 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19039 if (align_bytes & 2)
19041 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19042 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19043 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19044 set_mem_align (dst, 2 * BITS_PER_UNIT);
19045 if (src_align_bytes >= 0
19046 && (src_align_bytes & 1) == (align_bytes & 1)
19047 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19048 set_mem_align (src, 2 * BITS_PER_UNIT);
19050 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19052 if (align_bytes & 4)
19054 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19055 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19056 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19057 set_mem_align (dst, 4 * BITS_PER_UNIT);
19058 if (src_align_bytes >= 0)
19060 unsigned int src_align = 0;
19061 if ((src_align_bytes & 3) == (align_bytes & 3))
19063 else if ((src_align_bytes & 1) == (align_bytes & 1))
19065 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19066 set_mem_align (src, src_align * BITS_PER_UNIT);
19069 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19071 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19072 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19073 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19074 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19075 if (src_align_bytes >= 0)
19077 unsigned int src_align = 0;
19078 if ((src_align_bytes & 7) == (align_bytes & 7))
19080 else if ((src_align_bytes & 3) == (align_bytes & 3))
19082 else if ((src_align_bytes & 1) == (align_bytes & 1))
19084 if (src_align > (unsigned int) desired_align)
19085 src_align = desired_align;
19086 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19087 set_mem_align (src, src_align * BITS_PER_UNIT);
19090 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19092 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19097 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19098 DESIRED_ALIGNMENT. */
19100 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19101 int align, int desired_alignment)
19103 if (align <= 1 && desired_alignment > 1)
19105 rtx label = ix86_expand_aligntest (destptr, 1, false);
19106 destmem = change_address (destmem, QImode, destptr);
19107 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19108 ix86_adjust_counter (count, 1);
19109 emit_label (label);
19110 LABEL_NUSES (label) = 1;
19112 if (align <= 2 && desired_alignment > 2)
19114 rtx label = ix86_expand_aligntest (destptr, 2, false);
19115 destmem = change_address (destmem, HImode, destptr);
19116 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19117 ix86_adjust_counter (count, 2);
19118 emit_label (label);
19119 LABEL_NUSES (label) = 1;
19121 if (align <= 4 && desired_alignment > 4)
19123 rtx label = ix86_expand_aligntest (destptr, 4, false);
19124 destmem = change_address (destmem, SImode, destptr);
19125 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19126 ix86_adjust_counter (count, 4);
19127 emit_label (label);
19128 LABEL_NUSES (label) = 1;
19130 gcc_assert (desired_alignment <= 8);
19133 /* Set enough from DST to align DST known to by aligned by ALIGN to
19134 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19136 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19137 int desired_align, int align_bytes)
19140 rtx dst_size = MEM_SIZE (dst);
19141 if (align_bytes & 1)
19143 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19145 emit_insn (gen_strset (destreg, dst,
19146 gen_lowpart (QImode, value)));
19148 if (align_bytes & 2)
19150 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19151 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19152 set_mem_align (dst, 2 * BITS_PER_UNIT);
19154 emit_insn (gen_strset (destreg, dst,
19155 gen_lowpart (HImode, value)));
19157 if (align_bytes & 4)
19159 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19160 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19161 set_mem_align (dst, 4 * BITS_PER_UNIT);
19163 emit_insn (gen_strset (destreg, dst,
19164 gen_lowpart (SImode, value)));
19166 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19167 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19168 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19170 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19174 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19175 static enum stringop_alg
19176 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19177 int *dynamic_check)
19179 const struct stringop_algs * algs;
19180 bool optimize_for_speed;
19181 /* Algorithms using the rep prefix want at least edi and ecx;
19182 additionally, memset wants eax and memcpy wants esi. Don't
19183 consider such algorithms if the user has appropriated those
19184 registers for their own purposes. */
19185 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19187 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19189 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19190 || (alg != rep_prefix_1_byte \
19191 && alg != rep_prefix_4_byte \
19192 && alg != rep_prefix_8_byte))
19193 const struct processor_costs *cost;
19195 /* Even if the string operation call is cold, we still might spend a lot
19196 of time processing large blocks. */
19197 if (optimize_function_for_size_p (cfun)
19198 || (optimize_insn_for_size_p ()
19199 && expected_size != -1 && expected_size < 256))
19200 optimize_for_speed = false;
19202 optimize_for_speed = true;
19204 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19206 *dynamic_check = -1;
19208 algs = &cost->memset[TARGET_64BIT != 0];
19210 algs = &cost->memcpy[TARGET_64BIT != 0];
19211 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19212 return stringop_alg;
19213 /* rep; movq or rep; movl is the smallest variant. */
19214 else if (!optimize_for_speed)
19216 if (!count || (count & 3))
19217 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19219 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19221 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19223 else if (expected_size != -1 && expected_size < 4)
19224 return loop_1_byte;
19225 else if (expected_size != -1)
19228 enum stringop_alg alg = libcall;
19229 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19231 /* We get here if the algorithms that were not libcall-based
19232 were rep-prefix based and we are unable to use rep prefixes
19233 based on global register usage. Break out of the loop and
19234 use the heuristic below. */
19235 if (algs->size[i].max == 0)
19237 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19239 enum stringop_alg candidate = algs->size[i].alg;
19241 if (candidate != libcall && ALG_USABLE_P (candidate))
19243 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19244 last non-libcall inline algorithm. */
19245 if (TARGET_INLINE_ALL_STRINGOPS)
19247 /* When the current size is best to be copied by a libcall,
19248 but we are still forced to inline, run the heuristic below
19249 that will pick code for medium sized blocks. */
19250 if (alg != libcall)
19254 else if (ALG_USABLE_P (candidate))
19258 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19260 /* When asked to inline the call anyway, try to pick meaningful choice.
19261 We look for maximal size of block that is faster to copy by hand and
19262 take blocks of at most of that size guessing that average size will
19263 be roughly half of the block.
19265 If this turns out to be bad, we might simply specify the preferred
19266 choice in ix86_costs. */
19267 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19268 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19271 enum stringop_alg alg;
19273 bool any_alg_usable_p = true;
19275 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19277 enum stringop_alg candidate = algs->size[i].alg;
19278 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19280 if (candidate != libcall && candidate
19281 && ALG_USABLE_P (candidate))
19282 max = algs->size[i].max;
19284 /* If there aren't any usable algorithms, then recursing on
19285 smaller sizes isn't going to find anything. Just return the
19286 simple byte-at-a-time copy loop. */
19287 if (!any_alg_usable_p)
19289 /* Pick something reasonable. */
19290 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19291 *dynamic_check = 128;
19292 return loop_1_byte;
19296 alg = decide_alg (count, max / 2, memset, dynamic_check);
19297 gcc_assert (*dynamic_check == -1);
19298 gcc_assert (alg != libcall);
19299 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19300 *dynamic_check = max;
19303 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19304 #undef ALG_USABLE_P
19307 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19308 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19310 decide_alignment (int align,
19311 enum stringop_alg alg,
19314 int desired_align = 0;
19318 gcc_unreachable ();
19320 case unrolled_loop:
19321 desired_align = GET_MODE_SIZE (Pmode);
19323 case rep_prefix_8_byte:
19326 case rep_prefix_4_byte:
19327 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19328 copying whole cacheline at once. */
19329 if (TARGET_PENTIUMPRO)
19334 case rep_prefix_1_byte:
19335 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19336 copying whole cacheline at once. */
19337 if (TARGET_PENTIUMPRO)
19351 if (desired_align < align)
19352 desired_align = align;
19353 if (expected_size != -1 && expected_size < 4)
19354 desired_align = align;
19355 return desired_align;
19358 /* Return the smallest power of 2 greater than VAL. */
19360 smallest_pow2_greater_than (int val)
19368 /* Expand string move (memcpy) operation. Use i386 string operations when
19369 profitable. expand_setmem contains similar code. The code depends upon
19370 architecture, block size and alignment, but always has the same
19373 1) Prologue guard: Conditional that jumps up to epilogues for small
19374 blocks that can be handled by epilogue alone. This is faster but
19375 also needed for correctness, since prologue assume the block is larger
19376 than the desired alignment.
19378 Optional dynamic check for size and libcall for large
19379 blocks is emitted here too, with -minline-stringops-dynamically.
19381 2) Prologue: copy first few bytes in order to get destination aligned
19382 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19383 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19384 We emit either a jump tree on power of two sized blocks, or a byte loop.
19386 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19387 with specified algorithm.
19389 4) Epilogue: code copying tail of the block that is too small to be
19390 handled by main body (or up to size guarded by prologue guard). */
19393 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19394 rtx expected_align_exp, rtx expected_size_exp)
19400 rtx jump_around_label = NULL;
19401 HOST_WIDE_INT align = 1;
19402 unsigned HOST_WIDE_INT count = 0;
19403 HOST_WIDE_INT expected_size = -1;
19404 int size_needed = 0, epilogue_size_needed;
19405 int desired_align = 0, align_bytes = 0;
19406 enum stringop_alg alg;
19408 bool need_zero_guard = false;
19410 if (CONST_INT_P (align_exp))
19411 align = INTVAL (align_exp);
19412 /* i386 can do misaligned access on reasonably increased cost. */
19413 if (CONST_INT_P (expected_align_exp)
19414 && INTVAL (expected_align_exp) > align)
19415 align = INTVAL (expected_align_exp);
19416 /* ALIGN is the minimum of destination and source alignment, but we care here
19417 just about destination alignment. */
19418 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19419 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19421 if (CONST_INT_P (count_exp))
19422 count = expected_size = INTVAL (count_exp);
19423 if (CONST_INT_P (expected_size_exp) && count == 0)
19424 expected_size = INTVAL (expected_size_exp);
19426 /* Make sure we don't need to care about overflow later on. */
19427 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19430 /* Step 0: Decide on preferred algorithm, desired alignment and
19431 size of chunks to be copied by main loop. */
19433 alg = decide_alg (count, expected_size, false, &dynamic_check);
19434 desired_align = decide_alignment (align, alg, expected_size);
19436 if (!TARGET_ALIGN_STRINGOPS)
19437 align = desired_align;
19439 if (alg == libcall)
19441 gcc_assert (alg != no_stringop);
19443 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19444 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19445 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19450 gcc_unreachable ();
19452 need_zero_guard = true;
19453 size_needed = GET_MODE_SIZE (Pmode);
19455 case unrolled_loop:
19456 need_zero_guard = true;
19457 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19459 case rep_prefix_8_byte:
19462 case rep_prefix_4_byte:
19465 case rep_prefix_1_byte:
19469 need_zero_guard = true;
19474 epilogue_size_needed = size_needed;
19476 /* Step 1: Prologue guard. */
19478 /* Alignment code needs count to be in register. */
19479 if (CONST_INT_P (count_exp) && desired_align > align)
19481 if (INTVAL (count_exp) > desired_align
19482 && INTVAL (count_exp) > size_needed)
19485 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19486 if (align_bytes <= 0)
19489 align_bytes = desired_align - align_bytes;
19491 if (align_bytes == 0)
19492 count_exp = force_reg (counter_mode (count_exp), count_exp);
19494 gcc_assert (desired_align >= 1 && align >= 1);
19496 /* Ensure that alignment prologue won't copy past end of block. */
19497 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19499 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19500 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19501 Make sure it is power of 2. */
19502 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19506 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19508 /* If main algorithm works on QImode, no epilogue is needed.
19509 For small sizes just don't align anything. */
19510 if (size_needed == 1)
19511 desired_align = align;
19518 label = gen_label_rtx ();
19519 emit_cmp_and_jump_insns (count_exp,
19520 GEN_INT (epilogue_size_needed),
19521 LTU, 0, counter_mode (count_exp), 1, label);
19522 if (expected_size == -1 || expected_size < epilogue_size_needed)
19523 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19525 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19529 /* Emit code to decide on runtime whether library call or inline should be
19531 if (dynamic_check != -1)
19533 if (CONST_INT_P (count_exp))
19535 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19537 emit_block_move_via_libcall (dst, src, count_exp, false);
19538 count_exp = const0_rtx;
19544 rtx hot_label = gen_label_rtx ();
19545 jump_around_label = gen_label_rtx ();
19546 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19547 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19548 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19549 emit_block_move_via_libcall (dst, src, count_exp, false);
19550 emit_jump (jump_around_label);
19551 emit_label (hot_label);
19555 /* Step 2: Alignment prologue. */
19557 if (desired_align > align)
19559 if (align_bytes == 0)
19561 /* Except for the first move in epilogue, we no longer know
19562 constant offset in aliasing info. It don't seems to worth
19563 the pain to maintain it for the first move, so throw away
19565 src = change_address (src, BLKmode, srcreg);
19566 dst = change_address (dst, BLKmode, destreg);
19567 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19572 /* If we know how many bytes need to be stored before dst is
19573 sufficiently aligned, maintain aliasing info accurately. */
19574 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19575 desired_align, align_bytes);
19576 count_exp = plus_constant (count_exp, -align_bytes);
19577 count -= align_bytes;
19579 if (need_zero_guard
19580 && (count < (unsigned HOST_WIDE_INT) size_needed
19581 || (align_bytes == 0
19582 && count < ((unsigned HOST_WIDE_INT) size_needed
19583 + desired_align - align))))
19585 /* It is possible that we copied enough so the main loop will not
19587 gcc_assert (size_needed > 1);
19588 if (label == NULL_RTX)
19589 label = gen_label_rtx ();
19590 emit_cmp_and_jump_insns (count_exp,
19591 GEN_INT (size_needed),
19592 LTU, 0, counter_mode (count_exp), 1, label);
19593 if (expected_size == -1
19594 || expected_size < (desired_align - align) / 2 + size_needed)
19595 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19597 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19600 if (label && size_needed == 1)
19602 emit_label (label);
19603 LABEL_NUSES (label) = 1;
19605 epilogue_size_needed = 1;
19607 else if (label == NULL_RTX)
19608 epilogue_size_needed = size_needed;
19610 /* Step 3: Main loop. */
19616 gcc_unreachable ();
19618 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19619 count_exp, QImode, 1, expected_size);
19622 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19623 count_exp, Pmode, 1, expected_size);
19625 case unrolled_loop:
19626 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19627 registers for 4 temporaries anyway. */
19628 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19629 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19632 case rep_prefix_8_byte:
19633 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19636 case rep_prefix_4_byte:
19637 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19640 case rep_prefix_1_byte:
19641 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19645 /* Adjust properly the offset of src and dest memory for aliasing. */
19646 if (CONST_INT_P (count_exp))
19648 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19649 (count / size_needed) * size_needed);
19650 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19651 (count / size_needed) * size_needed);
19655 src = change_address (src, BLKmode, srcreg);
19656 dst = change_address (dst, BLKmode, destreg);
19659 /* Step 4: Epilogue to copy the remaining bytes. */
19663 /* When the main loop is done, COUNT_EXP might hold original count,
19664 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19665 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19666 bytes. Compensate if needed. */
19668 if (size_needed < epilogue_size_needed)
19671 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19672 GEN_INT (size_needed - 1), count_exp, 1,
19674 if (tmp != count_exp)
19675 emit_move_insn (count_exp, tmp);
19677 emit_label (label);
19678 LABEL_NUSES (label) = 1;
19681 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19682 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19683 epilogue_size_needed);
19684 if (jump_around_label)
19685 emit_label (jump_around_label);
19689 /* Helper function for memcpy. For QImode value 0xXY produce
19690 0xXYXYXYXY of wide specified by MODE. This is essentially
19691 a * 0x10101010, but we can do slightly better than
19692 synth_mult by unwinding the sequence by hand on CPUs with
19695 promote_duplicated_reg (enum machine_mode mode, rtx val)
19697 enum machine_mode valmode = GET_MODE (val);
19699 int nops = mode == DImode ? 3 : 2;
19701 gcc_assert (mode == SImode || mode == DImode);
19702 if (val == const0_rtx)
19703 return copy_to_mode_reg (mode, const0_rtx);
19704 if (CONST_INT_P (val))
19706 HOST_WIDE_INT v = INTVAL (val) & 255;
19710 if (mode == DImode)
19711 v |= (v << 16) << 16;
19712 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19715 if (valmode == VOIDmode)
19717 if (valmode != QImode)
19718 val = gen_lowpart (QImode, val);
19719 if (mode == QImode)
19721 if (!TARGET_PARTIAL_REG_STALL)
19723 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19724 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19725 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19726 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19728 rtx reg = convert_modes (mode, QImode, val, true);
19729 tmp = promote_duplicated_reg (mode, const1_rtx);
19730 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19735 rtx reg = convert_modes (mode, QImode, val, true);
19737 if (!TARGET_PARTIAL_REG_STALL)
19738 if (mode == SImode)
19739 emit_insn (gen_movsi_insv_1 (reg, reg));
19741 emit_insn (gen_movdi_insv_1 (reg, reg));
19744 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19745 NULL, 1, OPTAB_DIRECT);
19747 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19749 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19750 NULL, 1, OPTAB_DIRECT);
19751 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19752 if (mode == SImode)
19754 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19755 NULL, 1, OPTAB_DIRECT);
19756 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19761 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19762 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19763 alignment from ALIGN to DESIRED_ALIGN. */
19765 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19770 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19771 promoted_val = promote_duplicated_reg (DImode, val);
19772 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19773 promoted_val = promote_duplicated_reg (SImode, val);
19774 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19775 promoted_val = promote_duplicated_reg (HImode, val);
19777 promoted_val = val;
19779 return promoted_val;
19782 /* Expand string clear operation (bzero). Use i386 string operations when
19783 profitable. See expand_movmem comment for explanation of individual
19784 steps performed. */
19786 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19787 rtx expected_align_exp, rtx expected_size_exp)
19792 rtx jump_around_label = NULL;
19793 HOST_WIDE_INT align = 1;
19794 unsigned HOST_WIDE_INT count = 0;
19795 HOST_WIDE_INT expected_size = -1;
19796 int size_needed = 0, epilogue_size_needed;
19797 int desired_align = 0, align_bytes = 0;
19798 enum stringop_alg alg;
19799 rtx promoted_val = NULL;
19800 bool force_loopy_epilogue = false;
19802 bool need_zero_guard = false;
19804 if (CONST_INT_P (align_exp))
19805 align = INTVAL (align_exp);
19806 /* i386 can do misaligned access on reasonably increased cost. */
19807 if (CONST_INT_P (expected_align_exp)
19808 && INTVAL (expected_align_exp) > align)
19809 align = INTVAL (expected_align_exp);
19810 if (CONST_INT_P (count_exp))
19811 count = expected_size = INTVAL (count_exp);
19812 if (CONST_INT_P (expected_size_exp) && count == 0)
19813 expected_size = INTVAL (expected_size_exp);
19815 /* Make sure we don't need to care about overflow later on. */
19816 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19819 /* Step 0: Decide on preferred algorithm, desired alignment and
19820 size of chunks to be copied by main loop. */
19822 alg = decide_alg (count, expected_size, true, &dynamic_check);
19823 desired_align = decide_alignment (align, alg, expected_size);
19825 if (!TARGET_ALIGN_STRINGOPS)
19826 align = desired_align;
19828 if (alg == libcall)
19830 gcc_assert (alg != no_stringop);
19832 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19833 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19838 gcc_unreachable ();
19840 need_zero_guard = true;
19841 size_needed = GET_MODE_SIZE (Pmode);
19843 case unrolled_loop:
19844 need_zero_guard = true;
19845 size_needed = GET_MODE_SIZE (Pmode) * 4;
19847 case rep_prefix_8_byte:
19850 case rep_prefix_4_byte:
19853 case rep_prefix_1_byte:
19857 need_zero_guard = true;
19861 epilogue_size_needed = size_needed;
19863 /* Step 1: Prologue guard. */
19865 /* Alignment code needs count to be in register. */
19866 if (CONST_INT_P (count_exp) && desired_align > align)
19868 if (INTVAL (count_exp) > desired_align
19869 && INTVAL (count_exp) > size_needed)
19872 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19873 if (align_bytes <= 0)
19876 align_bytes = desired_align - align_bytes;
19878 if (align_bytes == 0)
19880 enum machine_mode mode = SImode;
19881 if (TARGET_64BIT && (count & ~0xffffffff))
19883 count_exp = force_reg (mode, count_exp);
19886 /* Do the cheap promotion to allow better CSE across the
19887 main loop and epilogue (ie one load of the big constant in the
19888 front of all code. */
19889 if (CONST_INT_P (val_exp))
19890 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19891 desired_align, align);
19892 /* Ensure that alignment prologue won't copy past end of block. */
19893 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19895 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19896 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19897 Make sure it is power of 2. */
19898 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19900 /* To improve performance of small blocks, we jump around the VAL
19901 promoting mode. This mean that if the promoted VAL is not constant,
19902 we might not use it in the epilogue and have to use byte
19904 if (epilogue_size_needed > 2 && !promoted_val)
19905 force_loopy_epilogue = true;
19908 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19910 /* If main algorithm works on QImode, no epilogue is needed.
19911 For small sizes just don't align anything. */
19912 if (size_needed == 1)
19913 desired_align = align;
19920 label = gen_label_rtx ();
19921 emit_cmp_and_jump_insns (count_exp,
19922 GEN_INT (epilogue_size_needed),
19923 LTU, 0, counter_mode (count_exp), 1, label);
19924 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19925 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19927 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19930 if (dynamic_check != -1)
19932 rtx hot_label = gen_label_rtx ();
19933 jump_around_label = gen_label_rtx ();
19934 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19935 LEU, 0, counter_mode (count_exp), 1, hot_label);
19936 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19937 set_storage_via_libcall (dst, count_exp, val_exp, false);
19938 emit_jump (jump_around_label);
19939 emit_label (hot_label);
19942 /* Step 2: Alignment prologue. */
19944 /* Do the expensive promotion once we branched off the small blocks. */
19946 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19947 desired_align, align);
19948 gcc_assert (desired_align >= 1 && align >= 1);
19950 if (desired_align > align)
19952 if (align_bytes == 0)
19954 /* Except for the first move in epilogue, we no longer know
19955 constant offset in aliasing info. It don't seems to worth
19956 the pain to maintain it for the first move, so throw away
19958 dst = change_address (dst, BLKmode, destreg);
19959 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19964 /* If we know how many bytes need to be stored before dst is
19965 sufficiently aligned, maintain aliasing info accurately. */
19966 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19967 desired_align, align_bytes);
19968 count_exp = plus_constant (count_exp, -align_bytes);
19969 count -= align_bytes;
19971 if (need_zero_guard
19972 && (count < (unsigned HOST_WIDE_INT) size_needed
19973 || (align_bytes == 0
19974 && count < ((unsigned HOST_WIDE_INT) size_needed
19975 + desired_align - align))))
19977 /* It is possible that we copied enough so the main loop will not
19979 gcc_assert (size_needed > 1);
19980 if (label == NULL_RTX)
19981 label = gen_label_rtx ();
19982 emit_cmp_and_jump_insns (count_exp,
19983 GEN_INT (size_needed),
19984 LTU, 0, counter_mode (count_exp), 1, label);
19985 if (expected_size == -1
19986 || expected_size < (desired_align - align) / 2 + size_needed)
19987 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19989 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19992 if (label && size_needed == 1)
19994 emit_label (label);
19995 LABEL_NUSES (label) = 1;
19997 promoted_val = val_exp;
19998 epilogue_size_needed = 1;
20000 else if (label == NULL_RTX)
20001 epilogue_size_needed = size_needed;
20003 /* Step 3: Main loop. */
20009 gcc_unreachable ();
20011 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20012 count_exp, QImode, 1, expected_size);
20015 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20016 count_exp, Pmode, 1, expected_size);
20018 case unrolled_loop:
20019 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20020 count_exp, Pmode, 4, expected_size);
20022 case rep_prefix_8_byte:
20023 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20026 case rep_prefix_4_byte:
20027 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20030 case rep_prefix_1_byte:
20031 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20035 /* Adjust properly the offset of src and dest memory for aliasing. */
20036 if (CONST_INT_P (count_exp))
20037 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20038 (count / size_needed) * size_needed);
20040 dst = change_address (dst, BLKmode, destreg);
20042 /* Step 4: Epilogue to copy the remaining bytes. */
20046 /* When the main loop is done, COUNT_EXP might hold original count,
20047 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20048 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20049 bytes. Compensate if needed. */
20051 if (size_needed < epilogue_size_needed)
20054 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20055 GEN_INT (size_needed - 1), count_exp, 1,
20057 if (tmp != count_exp)
20058 emit_move_insn (count_exp, tmp);
20060 emit_label (label);
20061 LABEL_NUSES (label) = 1;
20064 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20066 if (force_loopy_epilogue)
20067 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20068 epilogue_size_needed);
20070 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20071 epilogue_size_needed);
20073 if (jump_around_label)
20074 emit_label (jump_around_label);
20078 /* Expand the appropriate insns for doing strlen if not just doing
20081 out = result, initialized with the start address
20082 align_rtx = alignment of the address.
20083 scratch = scratch register, initialized with the startaddress when
20084 not aligned, otherwise undefined
20086 This is just the body. It needs the initializations mentioned above and
20087 some address computing at the end. These things are done in i386.md. */
20090 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20094 rtx align_2_label = NULL_RTX;
20095 rtx align_3_label = NULL_RTX;
20096 rtx align_4_label = gen_label_rtx ();
20097 rtx end_0_label = gen_label_rtx ();
20099 rtx tmpreg = gen_reg_rtx (SImode);
20100 rtx scratch = gen_reg_rtx (SImode);
20104 if (CONST_INT_P (align_rtx))
20105 align = INTVAL (align_rtx);
20107 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20109 /* Is there a known alignment and is it less than 4? */
20112 rtx scratch1 = gen_reg_rtx (Pmode);
20113 emit_move_insn (scratch1, out);
20114 /* Is there a known alignment and is it not 2? */
20117 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20118 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20120 /* Leave just the 3 lower bits. */
20121 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20122 NULL_RTX, 0, OPTAB_WIDEN);
20124 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20125 Pmode, 1, align_4_label);
20126 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20127 Pmode, 1, align_2_label);
20128 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20129 Pmode, 1, align_3_label);
20133 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20134 check if is aligned to 4 - byte. */
20136 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20137 NULL_RTX, 0, OPTAB_WIDEN);
20139 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20140 Pmode, 1, align_4_label);
20143 mem = change_address (src, QImode, out);
20145 /* Now compare the bytes. */
20147 /* Compare the first n unaligned byte on a byte per byte basis. */
20148 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20149 QImode, 1, end_0_label);
20151 /* Increment the address. */
20152 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20154 /* Not needed with an alignment of 2 */
20157 emit_label (align_2_label);
20159 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20162 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20164 emit_label (align_3_label);
20167 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20170 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20173 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20174 align this loop. It gives only huge programs, but does not help to
20176 emit_label (align_4_label);
20178 mem = change_address (src, SImode, out);
20179 emit_move_insn (scratch, mem);
20180 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20182 /* This formula yields a nonzero result iff one of the bytes is zero.
20183 This saves three branches inside loop and many cycles. */
20185 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20186 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20187 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20188 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20189 gen_int_mode (0x80808080, SImode)));
20190 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20195 rtx reg = gen_reg_rtx (SImode);
20196 rtx reg2 = gen_reg_rtx (Pmode);
20197 emit_move_insn (reg, tmpreg);
20198 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20200 /* If zero is not in the first two bytes, move two bytes forward. */
20201 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20202 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20203 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20204 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20205 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20208 /* Emit lea manually to avoid clobbering of flags. */
20209 emit_insn (gen_rtx_SET (SImode, reg2,
20210 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20212 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20213 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20214 emit_insn (gen_rtx_SET (VOIDmode, out,
20215 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20221 rtx end_2_label = gen_label_rtx ();
20222 /* Is zero in the first two bytes? */
20224 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20225 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20226 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20227 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20228 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20230 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20231 JUMP_LABEL (tmp) = end_2_label;
20233 /* Not in the first two. Move two bytes forward. */
20234 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20235 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20237 emit_label (end_2_label);
20241 /* Avoid branch in fixing the byte. */
20242 tmpreg = gen_lowpart (QImode, tmpreg);
20243 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20244 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20245 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20246 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20248 emit_label (end_0_label);
20251 /* Expand strlen. */
20254 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20256 rtx addr, scratch1, scratch2, scratch3, scratch4;
20258 /* The generic case of strlen expander is long. Avoid it's
20259 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20261 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20262 && !TARGET_INLINE_ALL_STRINGOPS
20263 && !optimize_insn_for_size_p ()
20264 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20267 addr = force_reg (Pmode, XEXP (src, 0));
20268 scratch1 = gen_reg_rtx (Pmode);
20270 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20271 && !optimize_insn_for_size_p ())
20273 /* Well it seems that some optimizer does not combine a call like
20274 foo(strlen(bar), strlen(bar));
20275 when the move and the subtraction is done here. It does calculate
20276 the length just once when these instructions are done inside of
20277 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20278 often used and I use one fewer register for the lifetime of
20279 output_strlen_unroll() this is better. */
20281 emit_move_insn (out, addr);
20283 ix86_expand_strlensi_unroll_1 (out, src, align);
20285 /* strlensi_unroll_1 returns the address of the zero at the end of
20286 the string, like memchr(), so compute the length by subtracting
20287 the start address. */
20288 emit_insn (ix86_gen_sub3 (out, out, addr));
20294 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20295 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20298 scratch2 = gen_reg_rtx (Pmode);
20299 scratch3 = gen_reg_rtx (Pmode);
20300 scratch4 = force_reg (Pmode, constm1_rtx);
20302 emit_move_insn (scratch3, addr);
20303 eoschar = force_reg (QImode, eoschar);
20305 src = replace_equiv_address_nv (src, scratch3);
20307 /* If .md starts supporting :P, this can be done in .md. */
20308 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20309 scratch4), UNSPEC_SCAS);
20310 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20311 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20312 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20317 /* For given symbol (function) construct code to compute address of it's PLT
20318 entry in large x86-64 PIC model. */
20320 construct_plt_address (rtx symbol)
20322 rtx tmp = gen_reg_rtx (Pmode);
20323 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20325 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20326 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20328 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20329 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20334 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20336 rtx pop, int sibcall)
20338 rtx use = NULL, call;
20340 if (pop == const0_rtx)
20342 gcc_assert (!TARGET_64BIT || !pop);
20344 if (TARGET_MACHO && !TARGET_64BIT)
20347 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20348 fnaddr = machopic_indirect_call_target (fnaddr);
20353 /* Static functions and indirect calls don't need the pic register. */
20354 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20355 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20356 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20357 use_reg (&use, pic_offset_table_rtx);
20360 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20362 rtx al = gen_rtx_REG (QImode, AX_REG);
20363 emit_move_insn (al, callarg2);
20364 use_reg (&use, al);
20367 if (ix86_cmodel == CM_LARGE_PIC
20369 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20370 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20371 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20373 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20374 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20376 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20377 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20380 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20382 call = gen_rtx_SET (VOIDmode, retval, call);
20385 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20386 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20387 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20390 && ix86_cfun_abi () == MS_ABI
20391 && (!callarg2 || INTVAL (callarg2) != -2))
20393 /* We need to represent that SI and DI registers are clobbered
20395 static int clobbered_registers[] = {
20396 XMM6_REG, XMM7_REG, XMM8_REG,
20397 XMM9_REG, XMM10_REG, XMM11_REG,
20398 XMM12_REG, XMM13_REG, XMM14_REG,
20399 XMM15_REG, SI_REG, DI_REG
20402 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20403 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20404 UNSPEC_MS_TO_SYSV_CALL);
20408 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20409 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20412 (SSE_REGNO_P (clobbered_registers[i])
20414 clobbered_registers[i]));
20416 call = gen_rtx_PARALLEL (VOIDmode,
20417 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20421 call = emit_call_insn (call);
20423 CALL_INSN_FUNCTION_USAGE (call) = use;
20427 /* Clear stack slot assignments remembered from previous functions.
20428 This is called from INIT_EXPANDERS once before RTL is emitted for each
20431 static struct machine_function *
20432 ix86_init_machine_status (void)
20434 struct machine_function *f;
20436 f = ggc_alloc_cleared_machine_function ();
20437 f->use_fast_prologue_epilogue_nregs = -1;
20438 f->tls_descriptor_call_expanded_p = 0;
20439 f->call_abi = ix86_abi;
20444 /* Return a MEM corresponding to a stack slot with mode MODE.
20445 Allocate a new slot if necessary.
20447 The RTL for a function can have several slots available: N is
20448 which slot to use. */
20451 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20453 struct stack_local_entry *s;
20455 gcc_assert (n < MAX_386_STACK_LOCALS);
20457 /* Virtual slot is valid only before vregs are instantiated. */
20458 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20460 for (s = ix86_stack_locals; s; s = s->next)
20461 if (s->mode == mode && s->n == n)
20462 return copy_rtx (s->rtl);
20464 s = ggc_alloc_stack_local_entry ();
20467 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20469 s->next = ix86_stack_locals;
20470 ix86_stack_locals = s;
20474 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20476 static GTY(()) rtx ix86_tls_symbol;
20478 ix86_tls_get_addr (void)
20481 if (!ix86_tls_symbol)
20483 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20484 (TARGET_ANY_GNU_TLS
20486 ? "___tls_get_addr"
20487 : "__tls_get_addr");
20490 return ix86_tls_symbol;
20493 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20495 static GTY(()) rtx ix86_tls_module_base_symbol;
20497 ix86_tls_module_base (void)
20500 if (!ix86_tls_module_base_symbol)
20502 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20503 "_TLS_MODULE_BASE_");
20504 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20505 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20508 return ix86_tls_module_base_symbol;
20511 /* Calculate the length of the memory address in the instruction
20512 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20515 memory_address_length (rtx addr)
20517 struct ix86_address parts;
20518 rtx base, index, disp;
20522 if (GET_CODE (addr) == PRE_DEC
20523 || GET_CODE (addr) == POST_INC
20524 || GET_CODE (addr) == PRE_MODIFY
20525 || GET_CODE (addr) == POST_MODIFY)
20528 ok = ix86_decompose_address (addr, &parts);
20531 if (parts.base && GET_CODE (parts.base) == SUBREG)
20532 parts.base = SUBREG_REG (parts.base);
20533 if (parts.index && GET_CODE (parts.index) == SUBREG)
20534 parts.index = SUBREG_REG (parts.index);
20537 index = parts.index;
20542 - esp as the base always wants an index,
20543 - ebp as the base always wants a displacement,
20544 - r12 as the base always wants an index,
20545 - r13 as the base always wants a displacement. */
20547 /* Register Indirect. */
20548 if (base && !index && !disp)
20550 /* esp (for its index) and ebp (for its displacement) need
20551 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20554 && (addr == arg_pointer_rtx
20555 || addr == frame_pointer_rtx
20556 || REGNO (addr) == SP_REG
20557 || REGNO (addr) == BP_REG
20558 || REGNO (addr) == R12_REG
20559 || REGNO (addr) == R13_REG))
20563 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20564 is not disp32, but disp32(%rip), so for disp32
20565 SIB byte is needed, unless print_operand_address
20566 optimizes it into disp32(%rip) or (%rip) is implied
20568 else if (disp && !base && !index)
20575 if (GET_CODE (disp) == CONST)
20576 symbol = XEXP (disp, 0);
20577 if (GET_CODE (symbol) == PLUS
20578 && CONST_INT_P (XEXP (symbol, 1)))
20579 symbol = XEXP (symbol, 0);
20581 if (GET_CODE (symbol) != LABEL_REF
20582 && (GET_CODE (symbol) != SYMBOL_REF
20583 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20584 && (GET_CODE (symbol) != UNSPEC
20585 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20586 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20593 /* Find the length of the displacement constant. */
20596 if (base && satisfies_constraint_K (disp))
20601 /* ebp always wants a displacement. Similarly r13. */
20602 else if (base && REG_P (base)
20603 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20606 /* An index requires the two-byte modrm form.... */
20608 /* ...like esp (or r12), which always wants an index. */
20609 || base == arg_pointer_rtx
20610 || base == frame_pointer_rtx
20611 || (base && REG_P (base)
20612 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20629 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20630 is set, expect that insn have 8bit immediate alternative. */
20632 ix86_attr_length_immediate_default (rtx insn, int shortform)
20636 extract_insn_cached (insn);
20637 for (i = recog_data.n_operands - 1; i >= 0; --i)
20638 if (CONSTANT_P (recog_data.operand[i]))
20640 enum attr_mode mode = get_attr_mode (insn);
20643 if (shortform && CONST_INT_P (recog_data.operand[i]))
20645 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20652 ival = trunc_int_for_mode (ival, HImode);
20655 ival = trunc_int_for_mode (ival, SImode);
20660 if (IN_RANGE (ival, -128, 127))
20677 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20682 fatal_insn ("unknown insn mode", insn);
20687 /* Compute default value for "length_address" attribute. */
20689 ix86_attr_length_address_default (rtx insn)
20693 if (get_attr_type (insn) == TYPE_LEA)
20695 rtx set = PATTERN (insn), addr;
20697 if (GET_CODE (set) == PARALLEL)
20698 set = XVECEXP (set, 0, 0);
20700 gcc_assert (GET_CODE (set) == SET);
20702 addr = SET_SRC (set);
20703 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20705 if (GET_CODE (addr) == ZERO_EXTEND)
20706 addr = XEXP (addr, 0);
20707 if (GET_CODE (addr) == SUBREG)
20708 addr = SUBREG_REG (addr);
20711 return memory_address_length (addr);
20714 extract_insn_cached (insn);
20715 for (i = recog_data.n_operands - 1; i >= 0; --i)
20716 if (MEM_P (recog_data.operand[i]))
20718 constrain_operands_cached (reload_completed);
20719 if (which_alternative != -1)
20721 const char *constraints = recog_data.constraints[i];
20722 int alt = which_alternative;
20724 while (*constraints == '=' || *constraints == '+')
20727 while (*constraints++ != ',')
20729 /* Skip ignored operands. */
20730 if (*constraints == 'X')
20733 return memory_address_length (XEXP (recog_data.operand[i], 0));
20738 /* Compute default value for "length_vex" attribute. It includes
20739 2 or 3 byte VEX prefix and 1 opcode byte. */
20742 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20747 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20748 byte VEX prefix. */
20749 if (!has_0f_opcode || has_vex_w)
20752 /* We can always use 2 byte VEX prefix in 32bit. */
20756 extract_insn_cached (insn);
20758 for (i = recog_data.n_operands - 1; i >= 0; --i)
20759 if (REG_P (recog_data.operand[i]))
20761 /* REX.W bit uses 3 byte VEX prefix. */
20762 if (GET_MODE (recog_data.operand[i]) == DImode
20763 && GENERAL_REG_P (recog_data.operand[i]))
20768 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20769 if (MEM_P (recog_data.operand[i])
20770 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20777 /* Return the maximum number of instructions a cpu can issue. */
20780 ix86_issue_rate (void)
20784 case PROCESSOR_PENTIUM:
20785 case PROCESSOR_ATOM:
20789 case PROCESSOR_PENTIUMPRO:
20790 case PROCESSOR_PENTIUM4:
20791 case PROCESSOR_ATHLON:
20793 case PROCESSOR_AMDFAM10:
20794 case PROCESSOR_NOCONA:
20795 case PROCESSOR_GENERIC32:
20796 case PROCESSOR_GENERIC64:
20797 case PROCESSOR_BDVER1:
20800 case PROCESSOR_CORE2:
20808 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20809 by DEP_INSN and nothing set by DEP_INSN. */
20812 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20816 /* Simplify the test for uninteresting insns. */
20817 if (insn_type != TYPE_SETCC
20818 && insn_type != TYPE_ICMOV
20819 && insn_type != TYPE_FCMOV
20820 && insn_type != TYPE_IBR)
20823 if ((set = single_set (dep_insn)) != 0)
20825 set = SET_DEST (set);
20828 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20829 && XVECLEN (PATTERN (dep_insn), 0) == 2
20830 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20831 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20833 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20834 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20839 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20842 /* This test is true if the dependent insn reads the flags but
20843 not any other potentially set register. */
20844 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20847 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20853 /* Return true iff USE_INSN has a memory address with operands set by
20857 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20860 extract_insn_cached (use_insn);
20861 for (i = recog_data.n_operands - 1; i >= 0; --i)
20862 if (MEM_P (recog_data.operand[i]))
20864 rtx addr = XEXP (recog_data.operand[i], 0);
20865 return modified_in_p (addr, set_insn) != 0;
20871 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20873 enum attr_type insn_type, dep_insn_type;
20874 enum attr_memory memory;
20876 int dep_insn_code_number;
20878 /* Anti and output dependencies have zero cost on all CPUs. */
20879 if (REG_NOTE_KIND (link) != 0)
20882 dep_insn_code_number = recog_memoized (dep_insn);
20884 /* If we can't recognize the insns, we can't really do anything. */
20885 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20888 insn_type = get_attr_type (insn);
20889 dep_insn_type = get_attr_type (dep_insn);
20893 case PROCESSOR_PENTIUM:
20894 /* Address Generation Interlock adds a cycle of latency. */
20895 if (insn_type == TYPE_LEA)
20897 rtx addr = PATTERN (insn);
20899 if (GET_CODE (addr) == PARALLEL)
20900 addr = XVECEXP (addr, 0, 0);
20902 gcc_assert (GET_CODE (addr) == SET);
20904 addr = SET_SRC (addr);
20905 if (modified_in_p (addr, dep_insn))
20908 else if (ix86_agi_dependent (dep_insn, insn))
20911 /* ??? Compares pair with jump/setcc. */
20912 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20915 /* Floating point stores require value to be ready one cycle earlier. */
20916 if (insn_type == TYPE_FMOV
20917 && get_attr_memory (insn) == MEMORY_STORE
20918 && !ix86_agi_dependent (dep_insn, insn))
20922 case PROCESSOR_PENTIUMPRO:
20923 memory = get_attr_memory (insn);
20925 /* INT->FP conversion is expensive. */
20926 if (get_attr_fp_int_src (dep_insn))
20929 /* There is one cycle extra latency between an FP op and a store. */
20930 if (insn_type == TYPE_FMOV
20931 && (set = single_set (dep_insn)) != NULL_RTX
20932 && (set2 = single_set (insn)) != NULL_RTX
20933 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20934 && MEM_P (SET_DEST (set2)))
20937 /* Show ability of reorder buffer to hide latency of load by executing
20938 in parallel with previous instruction in case
20939 previous instruction is not needed to compute the address. */
20940 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20941 && !ix86_agi_dependent (dep_insn, insn))
20943 /* Claim moves to take one cycle, as core can issue one load
20944 at time and the next load can start cycle later. */
20945 if (dep_insn_type == TYPE_IMOV
20946 || dep_insn_type == TYPE_FMOV)
20954 memory = get_attr_memory (insn);
20956 /* The esp dependency is resolved before the instruction is really
20958 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20959 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20962 /* INT->FP conversion is expensive. */
20963 if (get_attr_fp_int_src (dep_insn))
20966 /* Show ability of reorder buffer to hide latency of load by executing
20967 in parallel with previous instruction in case
20968 previous instruction is not needed to compute the address. */
20969 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20970 && !ix86_agi_dependent (dep_insn, insn))
20972 /* Claim moves to take one cycle, as core can issue one load
20973 at time and the next load can start cycle later. */
20974 if (dep_insn_type == TYPE_IMOV
20975 || dep_insn_type == TYPE_FMOV)
20984 case PROCESSOR_ATHLON:
20986 case PROCESSOR_AMDFAM10:
20987 case PROCESSOR_BDVER1:
20988 case PROCESSOR_ATOM:
20989 case PROCESSOR_GENERIC32:
20990 case PROCESSOR_GENERIC64:
20991 memory = get_attr_memory (insn);
20993 /* Show ability of reorder buffer to hide latency of load by executing
20994 in parallel with previous instruction in case
20995 previous instruction is not needed to compute the address. */
20996 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20997 && !ix86_agi_dependent (dep_insn, insn))
20999 enum attr_unit unit = get_attr_unit (insn);
21002 /* Because of the difference between the length of integer and
21003 floating unit pipeline preparation stages, the memory operands
21004 for floating point are cheaper.
21006 ??? For Athlon it the difference is most probably 2. */
21007 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21010 loadcost = TARGET_ATHLON ? 2 : 0;
21012 if (cost >= loadcost)
21025 /* How many alternative schedules to try. This should be as wide as the
21026 scheduling freedom in the DFA, but no wider. Making this value too
21027 large results extra work for the scheduler. */
21030 ia32_multipass_dfa_lookahead (void)
21034 case PROCESSOR_PENTIUM:
21037 case PROCESSOR_PENTIUMPRO:
21047 /* Compute the alignment given to a constant that is being placed in memory.
21048 EXP is the constant and ALIGN is the alignment that the object would
21050 The value of this function is used instead of that alignment to align
21054 ix86_constant_alignment (tree exp, int align)
21056 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21057 || TREE_CODE (exp) == INTEGER_CST)
21059 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21061 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21064 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21065 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21066 return BITS_PER_WORD;
21071 /* Compute the alignment for a static variable.
21072 TYPE is the data type, and ALIGN is the alignment that
21073 the object would ordinarily have. The value of this function is used
21074 instead of that alignment to align the object. */
21077 ix86_data_alignment (tree type, int align)
21079 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21081 if (AGGREGATE_TYPE_P (type)
21082 && TYPE_SIZE (type)
21083 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21084 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21085 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21086 && align < max_align)
21089 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21090 to 16byte boundary. */
21093 if (AGGREGATE_TYPE_P (type)
21094 && TYPE_SIZE (type)
21095 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21096 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21097 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21101 if (TREE_CODE (type) == ARRAY_TYPE)
21103 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21105 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21108 else if (TREE_CODE (type) == COMPLEX_TYPE)
21111 if (TYPE_MODE (type) == DCmode && align < 64)
21113 if ((TYPE_MODE (type) == XCmode
21114 || TYPE_MODE (type) == TCmode) && align < 128)
21117 else if ((TREE_CODE (type) == RECORD_TYPE
21118 || TREE_CODE (type) == UNION_TYPE
21119 || TREE_CODE (type) == QUAL_UNION_TYPE)
21120 && TYPE_FIELDS (type))
21122 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21124 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21127 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21128 || TREE_CODE (type) == INTEGER_TYPE)
21130 if (TYPE_MODE (type) == DFmode && align < 64)
21132 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21139 /* Compute the alignment for a local variable or a stack slot. EXP is
21140 the data type or decl itself, MODE is the widest mode available and
21141 ALIGN is the alignment that the object would ordinarily have. The
21142 value of this macro is used instead of that alignment to align the
21146 ix86_local_alignment (tree exp, enum machine_mode mode,
21147 unsigned int align)
21151 if (exp && DECL_P (exp))
21153 type = TREE_TYPE (exp);
21162 /* Don't do dynamic stack realignment for long long objects with
21163 -mpreferred-stack-boundary=2. */
21166 && ix86_preferred_stack_boundary < 64
21167 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21168 && (!type || !TYPE_USER_ALIGN (type))
21169 && (!decl || !DECL_USER_ALIGN (decl)))
21172 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21173 register in MODE. We will return the largest alignment of XF
21177 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21178 align = GET_MODE_ALIGNMENT (DFmode);
21182 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21183 to 16byte boundary. Exact wording is:
21185 An array uses the same alignment as its elements, except that a local or
21186 global array variable of length at least 16 bytes or
21187 a C99 variable-length array variable always has alignment of at least 16 bytes.
21189 This was added to allow use of aligned SSE instructions at arrays. This
21190 rule is meant for static storage (where compiler can not do the analysis
21191 by itself). We follow it for automatic variables only when convenient.
21192 We fully control everything in the function compiled and functions from
21193 other unit can not rely on the alignment.
21195 Exclude va_list type. It is the common case of local array where
21196 we can not benefit from the alignment. */
21197 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21200 if (AGGREGATE_TYPE_P (type)
21201 && (TYPE_MAIN_VARIANT (type)
21202 != TYPE_MAIN_VARIANT (va_list_type_node))
21203 && TYPE_SIZE (type)
21204 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21205 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21206 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21209 if (TREE_CODE (type) == ARRAY_TYPE)
21211 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21213 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21216 else if (TREE_CODE (type) == COMPLEX_TYPE)
21218 if (TYPE_MODE (type) == DCmode && align < 64)
21220 if ((TYPE_MODE (type) == XCmode
21221 || TYPE_MODE (type) == TCmode) && align < 128)
21224 else if ((TREE_CODE (type) == RECORD_TYPE
21225 || TREE_CODE (type) == UNION_TYPE
21226 || TREE_CODE (type) == QUAL_UNION_TYPE)
21227 && TYPE_FIELDS (type))
21229 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21231 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21234 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21235 || TREE_CODE (type) == INTEGER_TYPE)
21238 if (TYPE_MODE (type) == DFmode && align < 64)
21240 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21246 /* Compute the minimum required alignment for dynamic stack realignment
21247 purposes for a local variable, parameter or a stack slot. EXP is
21248 the data type or decl itself, MODE is its mode and ALIGN is the
21249 alignment that the object would ordinarily have. */
21252 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21253 unsigned int align)
21257 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21260 if (exp && DECL_P (exp))
21262 type = TREE_TYPE (exp);
21271 /* Don't do dynamic stack realignment for long long objects with
21272 -mpreferred-stack-boundary=2. */
21273 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21274 && (!type || !TYPE_USER_ALIGN (type))
21275 && (!decl || !DECL_USER_ALIGN (decl)))
21281 /* Find a location for the static chain incoming to a nested function.
21282 This is a register, unless all free registers are used by arguments. */
21285 ix86_static_chain (const_tree fndecl, bool incoming_p)
21289 if (!DECL_STATIC_CHAIN (fndecl))
21294 /* We always use R10 in 64-bit mode. */
21300 /* By default in 32-bit mode we use ECX to pass the static chain. */
21303 fntype = TREE_TYPE (fndecl);
21304 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21306 /* Fastcall functions use ecx/edx for arguments, which leaves
21307 us with EAX for the static chain. */
21310 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21312 /* Thiscall functions use ecx for arguments, which leaves
21313 us with EAX for the static chain. */
21316 else if (ix86_function_regparm (fntype, fndecl) == 3)
21318 /* For regparm 3, we have no free call-clobbered registers in
21319 which to store the static chain. In order to implement this,
21320 we have the trampoline push the static chain to the stack.
21321 However, we can't push a value below the return address when
21322 we call the nested function directly, so we have to use an
21323 alternate entry point. For this we use ESI, and have the
21324 alternate entry point push ESI, so that things appear the
21325 same once we're executing the nested function. */
21328 if (fndecl == current_function_decl)
21329 ix86_static_chain_on_stack = true;
21330 return gen_frame_mem (SImode,
21331 plus_constant (arg_pointer_rtx, -8));
21337 return gen_rtx_REG (Pmode, regno);
21340 /* Emit RTL insns to initialize the variable parts of a trampoline.
21341 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21342 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21343 to be passed to the target function. */
21346 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21350 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21357 /* Depending on the static chain location, either load a register
21358 with a constant, or push the constant to the stack. All of the
21359 instructions are the same size. */
21360 chain = ix86_static_chain (fndecl, true);
21363 if (REGNO (chain) == CX_REG)
21365 else if (REGNO (chain) == AX_REG)
21368 gcc_unreachable ();
21373 mem = adjust_address (m_tramp, QImode, 0);
21374 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21376 mem = adjust_address (m_tramp, SImode, 1);
21377 emit_move_insn (mem, chain_value);
21379 /* Compute offset from the end of the jmp to the target function.
21380 In the case in which the trampoline stores the static chain on
21381 the stack, we need to skip the first insn which pushes the
21382 (call-saved) register static chain; this push is 1 byte. */
21383 disp = expand_binop (SImode, sub_optab, fnaddr,
21384 plus_constant (XEXP (m_tramp, 0),
21385 MEM_P (chain) ? 9 : 10),
21386 NULL_RTX, 1, OPTAB_DIRECT);
21388 mem = adjust_address (m_tramp, QImode, 5);
21389 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21391 mem = adjust_address (m_tramp, SImode, 6);
21392 emit_move_insn (mem, disp);
21398 /* Load the function address to r11. Try to load address using
21399 the shorter movl instead of movabs. We may want to support
21400 movq for kernel mode, but kernel does not use trampolines at
21402 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21404 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21406 mem = adjust_address (m_tramp, HImode, offset);
21407 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21409 mem = adjust_address (m_tramp, SImode, offset + 2);
21410 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21415 mem = adjust_address (m_tramp, HImode, offset);
21416 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21418 mem = adjust_address (m_tramp, DImode, offset + 2);
21419 emit_move_insn (mem, fnaddr);
21423 /* Load static chain using movabs to r10. */
21424 mem = adjust_address (m_tramp, HImode, offset);
21425 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21427 mem = adjust_address (m_tramp, DImode, offset + 2);
21428 emit_move_insn (mem, chain_value);
21431 /* Jump to r11; the last (unused) byte is a nop, only there to
21432 pad the write out to a single 32-bit store. */
21433 mem = adjust_address (m_tramp, SImode, offset);
21434 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21437 gcc_assert (offset <= TRAMPOLINE_SIZE);
21440 #ifdef ENABLE_EXECUTE_STACK
21441 #ifdef CHECK_EXECUTE_STACK_ENABLED
21442 if (CHECK_EXECUTE_STACK_ENABLED)
21444 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21445 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21449 /* The following file contains several enumerations and data structures
21450 built from the definitions in i386-builtin-types.def. */
21452 #include "i386-builtin-types.inc"
21454 /* Table for the ix86 builtin non-function types. */
21455 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21457 /* Retrieve an element from the above table, building some of
21458 the types lazily. */
21461 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21463 unsigned int index;
21466 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21468 type = ix86_builtin_type_tab[(int) tcode];
21472 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21473 if (tcode <= IX86_BT_LAST_VECT)
21475 enum machine_mode mode;
21477 index = tcode - IX86_BT_LAST_PRIM - 1;
21478 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21479 mode = ix86_builtin_type_vect_mode[index];
21481 type = build_vector_type_for_mode (itype, mode);
21487 index = tcode - IX86_BT_LAST_VECT - 1;
21488 if (tcode <= IX86_BT_LAST_PTR)
21489 quals = TYPE_UNQUALIFIED;
21491 quals = TYPE_QUAL_CONST;
21493 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21494 if (quals != TYPE_UNQUALIFIED)
21495 itype = build_qualified_type (itype, quals);
21497 type = build_pointer_type (itype);
21500 ix86_builtin_type_tab[(int) tcode] = type;
21504 /* Table for the ix86 builtin function types. */
21505 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21507 /* Retrieve an element from the above table, building some of
21508 the types lazily. */
21511 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21515 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21517 type = ix86_builtin_func_type_tab[(int) tcode];
21521 if (tcode <= IX86_BT_LAST_FUNC)
21523 unsigned start = ix86_builtin_func_start[(int) tcode];
21524 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21525 tree rtype, atype, args = void_list_node;
21528 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21529 for (i = after - 1; i > start; --i)
21531 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21532 args = tree_cons (NULL, atype, args);
21535 type = build_function_type (rtype, args);
21539 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21540 enum ix86_builtin_func_type icode;
21542 icode = ix86_builtin_func_alias_base[index];
21543 type = ix86_get_builtin_func_type (icode);
21546 ix86_builtin_func_type_tab[(int) tcode] = type;
21551 /* Codes for all the SSE/MMX builtins. */
21554 IX86_BUILTIN_ADDPS,
21555 IX86_BUILTIN_ADDSS,
21556 IX86_BUILTIN_DIVPS,
21557 IX86_BUILTIN_DIVSS,
21558 IX86_BUILTIN_MULPS,
21559 IX86_BUILTIN_MULSS,
21560 IX86_BUILTIN_SUBPS,
21561 IX86_BUILTIN_SUBSS,
21563 IX86_BUILTIN_CMPEQPS,
21564 IX86_BUILTIN_CMPLTPS,
21565 IX86_BUILTIN_CMPLEPS,
21566 IX86_BUILTIN_CMPGTPS,
21567 IX86_BUILTIN_CMPGEPS,
21568 IX86_BUILTIN_CMPNEQPS,
21569 IX86_BUILTIN_CMPNLTPS,
21570 IX86_BUILTIN_CMPNLEPS,
21571 IX86_BUILTIN_CMPNGTPS,
21572 IX86_BUILTIN_CMPNGEPS,
21573 IX86_BUILTIN_CMPORDPS,
21574 IX86_BUILTIN_CMPUNORDPS,
21575 IX86_BUILTIN_CMPEQSS,
21576 IX86_BUILTIN_CMPLTSS,
21577 IX86_BUILTIN_CMPLESS,
21578 IX86_BUILTIN_CMPNEQSS,
21579 IX86_BUILTIN_CMPNLTSS,
21580 IX86_BUILTIN_CMPNLESS,
21581 IX86_BUILTIN_CMPNGTSS,
21582 IX86_BUILTIN_CMPNGESS,
21583 IX86_BUILTIN_CMPORDSS,
21584 IX86_BUILTIN_CMPUNORDSS,
21586 IX86_BUILTIN_COMIEQSS,
21587 IX86_BUILTIN_COMILTSS,
21588 IX86_BUILTIN_COMILESS,
21589 IX86_BUILTIN_COMIGTSS,
21590 IX86_BUILTIN_COMIGESS,
21591 IX86_BUILTIN_COMINEQSS,
21592 IX86_BUILTIN_UCOMIEQSS,
21593 IX86_BUILTIN_UCOMILTSS,
21594 IX86_BUILTIN_UCOMILESS,
21595 IX86_BUILTIN_UCOMIGTSS,
21596 IX86_BUILTIN_UCOMIGESS,
21597 IX86_BUILTIN_UCOMINEQSS,
21599 IX86_BUILTIN_CVTPI2PS,
21600 IX86_BUILTIN_CVTPS2PI,
21601 IX86_BUILTIN_CVTSI2SS,
21602 IX86_BUILTIN_CVTSI642SS,
21603 IX86_BUILTIN_CVTSS2SI,
21604 IX86_BUILTIN_CVTSS2SI64,
21605 IX86_BUILTIN_CVTTPS2PI,
21606 IX86_BUILTIN_CVTTSS2SI,
21607 IX86_BUILTIN_CVTTSS2SI64,
21609 IX86_BUILTIN_MAXPS,
21610 IX86_BUILTIN_MAXSS,
21611 IX86_BUILTIN_MINPS,
21612 IX86_BUILTIN_MINSS,
21614 IX86_BUILTIN_LOADUPS,
21615 IX86_BUILTIN_STOREUPS,
21616 IX86_BUILTIN_MOVSS,
21618 IX86_BUILTIN_MOVHLPS,
21619 IX86_BUILTIN_MOVLHPS,
21620 IX86_BUILTIN_LOADHPS,
21621 IX86_BUILTIN_LOADLPS,
21622 IX86_BUILTIN_STOREHPS,
21623 IX86_BUILTIN_STORELPS,
21625 IX86_BUILTIN_MASKMOVQ,
21626 IX86_BUILTIN_MOVMSKPS,
21627 IX86_BUILTIN_PMOVMSKB,
21629 IX86_BUILTIN_MOVNTPS,
21630 IX86_BUILTIN_MOVNTQ,
21632 IX86_BUILTIN_LOADDQU,
21633 IX86_BUILTIN_STOREDQU,
21635 IX86_BUILTIN_PACKSSWB,
21636 IX86_BUILTIN_PACKSSDW,
21637 IX86_BUILTIN_PACKUSWB,
21639 IX86_BUILTIN_PADDB,
21640 IX86_BUILTIN_PADDW,
21641 IX86_BUILTIN_PADDD,
21642 IX86_BUILTIN_PADDQ,
21643 IX86_BUILTIN_PADDSB,
21644 IX86_BUILTIN_PADDSW,
21645 IX86_BUILTIN_PADDUSB,
21646 IX86_BUILTIN_PADDUSW,
21647 IX86_BUILTIN_PSUBB,
21648 IX86_BUILTIN_PSUBW,
21649 IX86_BUILTIN_PSUBD,
21650 IX86_BUILTIN_PSUBQ,
21651 IX86_BUILTIN_PSUBSB,
21652 IX86_BUILTIN_PSUBSW,
21653 IX86_BUILTIN_PSUBUSB,
21654 IX86_BUILTIN_PSUBUSW,
21657 IX86_BUILTIN_PANDN,
21661 IX86_BUILTIN_PAVGB,
21662 IX86_BUILTIN_PAVGW,
21664 IX86_BUILTIN_PCMPEQB,
21665 IX86_BUILTIN_PCMPEQW,
21666 IX86_BUILTIN_PCMPEQD,
21667 IX86_BUILTIN_PCMPGTB,
21668 IX86_BUILTIN_PCMPGTW,
21669 IX86_BUILTIN_PCMPGTD,
21671 IX86_BUILTIN_PMADDWD,
21673 IX86_BUILTIN_PMAXSW,
21674 IX86_BUILTIN_PMAXUB,
21675 IX86_BUILTIN_PMINSW,
21676 IX86_BUILTIN_PMINUB,
21678 IX86_BUILTIN_PMULHUW,
21679 IX86_BUILTIN_PMULHW,
21680 IX86_BUILTIN_PMULLW,
21682 IX86_BUILTIN_PSADBW,
21683 IX86_BUILTIN_PSHUFW,
21685 IX86_BUILTIN_PSLLW,
21686 IX86_BUILTIN_PSLLD,
21687 IX86_BUILTIN_PSLLQ,
21688 IX86_BUILTIN_PSRAW,
21689 IX86_BUILTIN_PSRAD,
21690 IX86_BUILTIN_PSRLW,
21691 IX86_BUILTIN_PSRLD,
21692 IX86_BUILTIN_PSRLQ,
21693 IX86_BUILTIN_PSLLWI,
21694 IX86_BUILTIN_PSLLDI,
21695 IX86_BUILTIN_PSLLQI,
21696 IX86_BUILTIN_PSRAWI,
21697 IX86_BUILTIN_PSRADI,
21698 IX86_BUILTIN_PSRLWI,
21699 IX86_BUILTIN_PSRLDI,
21700 IX86_BUILTIN_PSRLQI,
21702 IX86_BUILTIN_PUNPCKHBW,
21703 IX86_BUILTIN_PUNPCKHWD,
21704 IX86_BUILTIN_PUNPCKHDQ,
21705 IX86_BUILTIN_PUNPCKLBW,
21706 IX86_BUILTIN_PUNPCKLWD,
21707 IX86_BUILTIN_PUNPCKLDQ,
21709 IX86_BUILTIN_SHUFPS,
21711 IX86_BUILTIN_RCPPS,
21712 IX86_BUILTIN_RCPSS,
21713 IX86_BUILTIN_RSQRTPS,
21714 IX86_BUILTIN_RSQRTPS_NR,
21715 IX86_BUILTIN_RSQRTSS,
21716 IX86_BUILTIN_RSQRTF,
21717 IX86_BUILTIN_SQRTPS,
21718 IX86_BUILTIN_SQRTPS_NR,
21719 IX86_BUILTIN_SQRTSS,
21721 IX86_BUILTIN_UNPCKHPS,
21722 IX86_BUILTIN_UNPCKLPS,
21724 IX86_BUILTIN_ANDPS,
21725 IX86_BUILTIN_ANDNPS,
21727 IX86_BUILTIN_XORPS,
21730 IX86_BUILTIN_LDMXCSR,
21731 IX86_BUILTIN_STMXCSR,
21732 IX86_BUILTIN_SFENCE,
21734 /* 3DNow! Original */
21735 IX86_BUILTIN_FEMMS,
21736 IX86_BUILTIN_PAVGUSB,
21737 IX86_BUILTIN_PF2ID,
21738 IX86_BUILTIN_PFACC,
21739 IX86_BUILTIN_PFADD,
21740 IX86_BUILTIN_PFCMPEQ,
21741 IX86_BUILTIN_PFCMPGE,
21742 IX86_BUILTIN_PFCMPGT,
21743 IX86_BUILTIN_PFMAX,
21744 IX86_BUILTIN_PFMIN,
21745 IX86_BUILTIN_PFMUL,
21746 IX86_BUILTIN_PFRCP,
21747 IX86_BUILTIN_PFRCPIT1,
21748 IX86_BUILTIN_PFRCPIT2,
21749 IX86_BUILTIN_PFRSQIT1,
21750 IX86_BUILTIN_PFRSQRT,
21751 IX86_BUILTIN_PFSUB,
21752 IX86_BUILTIN_PFSUBR,
21753 IX86_BUILTIN_PI2FD,
21754 IX86_BUILTIN_PMULHRW,
21756 /* 3DNow! Athlon Extensions */
21757 IX86_BUILTIN_PF2IW,
21758 IX86_BUILTIN_PFNACC,
21759 IX86_BUILTIN_PFPNACC,
21760 IX86_BUILTIN_PI2FW,
21761 IX86_BUILTIN_PSWAPDSI,
21762 IX86_BUILTIN_PSWAPDSF,
21765 IX86_BUILTIN_ADDPD,
21766 IX86_BUILTIN_ADDSD,
21767 IX86_BUILTIN_DIVPD,
21768 IX86_BUILTIN_DIVSD,
21769 IX86_BUILTIN_MULPD,
21770 IX86_BUILTIN_MULSD,
21771 IX86_BUILTIN_SUBPD,
21772 IX86_BUILTIN_SUBSD,
21774 IX86_BUILTIN_CMPEQPD,
21775 IX86_BUILTIN_CMPLTPD,
21776 IX86_BUILTIN_CMPLEPD,
21777 IX86_BUILTIN_CMPGTPD,
21778 IX86_BUILTIN_CMPGEPD,
21779 IX86_BUILTIN_CMPNEQPD,
21780 IX86_BUILTIN_CMPNLTPD,
21781 IX86_BUILTIN_CMPNLEPD,
21782 IX86_BUILTIN_CMPNGTPD,
21783 IX86_BUILTIN_CMPNGEPD,
21784 IX86_BUILTIN_CMPORDPD,
21785 IX86_BUILTIN_CMPUNORDPD,
21786 IX86_BUILTIN_CMPEQSD,
21787 IX86_BUILTIN_CMPLTSD,
21788 IX86_BUILTIN_CMPLESD,
21789 IX86_BUILTIN_CMPNEQSD,
21790 IX86_BUILTIN_CMPNLTSD,
21791 IX86_BUILTIN_CMPNLESD,
21792 IX86_BUILTIN_CMPORDSD,
21793 IX86_BUILTIN_CMPUNORDSD,
21795 IX86_BUILTIN_COMIEQSD,
21796 IX86_BUILTIN_COMILTSD,
21797 IX86_BUILTIN_COMILESD,
21798 IX86_BUILTIN_COMIGTSD,
21799 IX86_BUILTIN_COMIGESD,
21800 IX86_BUILTIN_COMINEQSD,
21801 IX86_BUILTIN_UCOMIEQSD,
21802 IX86_BUILTIN_UCOMILTSD,
21803 IX86_BUILTIN_UCOMILESD,
21804 IX86_BUILTIN_UCOMIGTSD,
21805 IX86_BUILTIN_UCOMIGESD,
21806 IX86_BUILTIN_UCOMINEQSD,
21808 IX86_BUILTIN_MAXPD,
21809 IX86_BUILTIN_MAXSD,
21810 IX86_BUILTIN_MINPD,
21811 IX86_BUILTIN_MINSD,
21813 IX86_BUILTIN_ANDPD,
21814 IX86_BUILTIN_ANDNPD,
21816 IX86_BUILTIN_XORPD,
21818 IX86_BUILTIN_SQRTPD,
21819 IX86_BUILTIN_SQRTSD,
21821 IX86_BUILTIN_UNPCKHPD,
21822 IX86_BUILTIN_UNPCKLPD,
21824 IX86_BUILTIN_SHUFPD,
21826 IX86_BUILTIN_LOADUPD,
21827 IX86_BUILTIN_STOREUPD,
21828 IX86_BUILTIN_MOVSD,
21830 IX86_BUILTIN_LOADHPD,
21831 IX86_BUILTIN_LOADLPD,
21833 IX86_BUILTIN_CVTDQ2PD,
21834 IX86_BUILTIN_CVTDQ2PS,
21836 IX86_BUILTIN_CVTPD2DQ,
21837 IX86_BUILTIN_CVTPD2PI,
21838 IX86_BUILTIN_CVTPD2PS,
21839 IX86_BUILTIN_CVTTPD2DQ,
21840 IX86_BUILTIN_CVTTPD2PI,
21842 IX86_BUILTIN_CVTPI2PD,
21843 IX86_BUILTIN_CVTSI2SD,
21844 IX86_BUILTIN_CVTSI642SD,
21846 IX86_BUILTIN_CVTSD2SI,
21847 IX86_BUILTIN_CVTSD2SI64,
21848 IX86_BUILTIN_CVTSD2SS,
21849 IX86_BUILTIN_CVTSS2SD,
21850 IX86_BUILTIN_CVTTSD2SI,
21851 IX86_BUILTIN_CVTTSD2SI64,
21853 IX86_BUILTIN_CVTPS2DQ,
21854 IX86_BUILTIN_CVTPS2PD,
21855 IX86_BUILTIN_CVTTPS2DQ,
21857 IX86_BUILTIN_MOVNTI,
21858 IX86_BUILTIN_MOVNTPD,
21859 IX86_BUILTIN_MOVNTDQ,
21861 IX86_BUILTIN_MOVQ128,
21864 IX86_BUILTIN_MASKMOVDQU,
21865 IX86_BUILTIN_MOVMSKPD,
21866 IX86_BUILTIN_PMOVMSKB128,
21868 IX86_BUILTIN_PACKSSWB128,
21869 IX86_BUILTIN_PACKSSDW128,
21870 IX86_BUILTIN_PACKUSWB128,
21872 IX86_BUILTIN_PADDB128,
21873 IX86_BUILTIN_PADDW128,
21874 IX86_BUILTIN_PADDD128,
21875 IX86_BUILTIN_PADDQ128,
21876 IX86_BUILTIN_PADDSB128,
21877 IX86_BUILTIN_PADDSW128,
21878 IX86_BUILTIN_PADDUSB128,
21879 IX86_BUILTIN_PADDUSW128,
21880 IX86_BUILTIN_PSUBB128,
21881 IX86_BUILTIN_PSUBW128,
21882 IX86_BUILTIN_PSUBD128,
21883 IX86_BUILTIN_PSUBQ128,
21884 IX86_BUILTIN_PSUBSB128,
21885 IX86_BUILTIN_PSUBSW128,
21886 IX86_BUILTIN_PSUBUSB128,
21887 IX86_BUILTIN_PSUBUSW128,
21889 IX86_BUILTIN_PAND128,
21890 IX86_BUILTIN_PANDN128,
21891 IX86_BUILTIN_POR128,
21892 IX86_BUILTIN_PXOR128,
21894 IX86_BUILTIN_PAVGB128,
21895 IX86_BUILTIN_PAVGW128,
21897 IX86_BUILTIN_PCMPEQB128,
21898 IX86_BUILTIN_PCMPEQW128,
21899 IX86_BUILTIN_PCMPEQD128,
21900 IX86_BUILTIN_PCMPGTB128,
21901 IX86_BUILTIN_PCMPGTW128,
21902 IX86_BUILTIN_PCMPGTD128,
21904 IX86_BUILTIN_PMADDWD128,
21906 IX86_BUILTIN_PMAXSW128,
21907 IX86_BUILTIN_PMAXUB128,
21908 IX86_BUILTIN_PMINSW128,
21909 IX86_BUILTIN_PMINUB128,
21911 IX86_BUILTIN_PMULUDQ,
21912 IX86_BUILTIN_PMULUDQ128,
21913 IX86_BUILTIN_PMULHUW128,
21914 IX86_BUILTIN_PMULHW128,
21915 IX86_BUILTIN_PMULLW128,
21917 IX86_BUILTIN_PSADBW128,
21918 IX86_BUILTIN_PSHUFHW,
21919 IX86_BUILTIN_PSHUFLW,
21920 IX86_BUILTIN_PSHUFD,
21922 IX86_BUILTIN_PSLLDQI128,
21923 IX86_BUILTIN_PSLLWI128,
21924 IX86_BUILTIN_PSLLDI128,
21925 IX86_BUILTIN_PSLLQI128,
21926 IX86_BUILTIN_PSRAWI128,
21927 IX86_BUILTIN_PSRADI128,
21928 IX86_BUILTIN_PSRLDQI128,
21929 IX86_BUILTIN_PSRLWI128,
21930 IX86_BUILTIN_PSRLDI128,
21931 IX86_BUILTIN_PSRLQI128,
21933 IX86_BUILTIN_PSLLDQ128,
21934 IX86_BUILTIN_PSLLW128,
21935 IX86_BUILTIN_PSLLD128,
21936 IX86_BUILTIN_PSLLQ128,
21937 IX86_BUILTIN_PSRAW128,
21938 IX86_BUILTIN_PSRAD128,
21939 IX86_BUILTIN_PSRLW128,
21940 IX86_BUILTIN_PSRLD128,
21941 IX86_BUILTIN_PSRLQ128,
21943 IX86_BUILTIN_PUNPCKHBW128,
21944 IX86_BUILTIN_PUNPCKHWD128,
21945 IX86_BUILTIN_PUNPCKHDQ128,
21946 IX86_BUILTIN_PUNPCKHQDQ128,
21947 IX86_BUILTIN_PUNPCKLBW128,
21948 IX86_BUILTIN_PUNPCKLWD128,
21949 IX86_BUILTIN_PUNPCKLDQ128,
21950 IX86_BUILTIN_PUNPCKLQDQ128,
21952 IX86_BUILTIN_CLFLUSH,
21953 IX86_BUILTIN_MFENCE,
21954 IX86_BUILTIN_LFENCE,
21956 IX86_BUILTIN_BSRSI,
21957 IX86_BUILTIN_BSRDI,
21958 IX86_BUILTIN_RDPMC,
21959 IX86_BUILTIN_RDTSC,
21960 IX86_BUILTIN_RDTSCP,
21961 IX86_BUILTIN_ROLQI,
21962 IX86_BUILTIN_ROLHI,
21963 IX86_BUILTIN_RORQI,
21964 IX86_BUILTIN_RORHI,
21967 IX86_BUILTIN_ADDSUBPS,
21968 IX86_BUILTIN_HADDPS,
21969 IX86_BUILTIN_HSUBPS,
21970 IX86_BUILTIN_MOVSHDUP,
21971 IX86_BUILTIN_MOVSLDUP,
21972 IX86_BUILTIN_ADDSUBPD,
21973 IX86_BUILTIN_HADDPD,
21974 IX86_BUILTIN_HSUBPD,
21975 IX86_BUILTIN_LDDQU,
21977 IX86_BUILTIN_MONITOR,
21978 IX86_BUILTIN_MWAIT,
21981 IX86_BUILTIN_PHADDW,
21982 IX86_BUILTIN_PHADDD,
21983 IX86_BUILTIN_PHADDSW,
21984 IX86_BUILTIN_PHSUBW,
21985 IX86_BUILTIN_PHSUBD,
21986 IX86_BUILTIN_PHSUBSW,
21987 IX86_BUILTIN_PMADDUBSW,
21988 IX86_BUILTIN_PMULHRSW,
21989 IX86_BUILTIN_PSHUFB,
21990 IX86_BUILTIN_PSIGNB,
21991 IX86_BUILTIN_PSIGNW,
21992 IX86_BUILTIN_PSIGND,
21993 IX86_BUILTIN_PALIGNR,
21994 IX86_BUILTIN_PABSB,
21995 IX86_BUILTIN_PABSW,
21996 IX86_BUILTIN_PABSD,
21998 IX86_BUILTIN_PHADDW128,
21999 IX86_BUILTIN_PHADDD128,
22000 IX86_BUILTIN_PHADDSW128,
22001 IX86_BUILTIN_PHSUBW128,
22002 IX86_BUILTIN_PHSUBD128,
22003 IX86_BUILTIN_PHSUBSW128,
22004 IX86_BUILTIN_PMADDUBSW128,
22005 IX86_BUILTIN_PMULHRSW128,
22006 IX86_BUILTIN_PSHUFB128,
22007 IX86_BUILTIN_PSIGNB128,
22008 IX86_BUILTIN_PSIGNW128,
22009 IX86_BUILTIN_PSIGND128,
22010 IX86_BUILTIN_PALIGNR128,
22011 IX86_BUILTIN_PABSB128,
22012 IX86_BUILTIN_PABSW128,
22013 IX86_BUILTIN_PABSD128,
22015 /* AMDFAM10 - SSE4A New Instructions. */
22016 IX86_BUILTIN_MOVNTSD,
22017 IX86_BUILTIN_MOVNTSS,
22018 IX86_BUILTIN_EXTRQI,
22019 IX86_BUILTIN_EXTRQ,
22020 IX86_BUILTIN_INSERTQI,
22021 IX86_BUILTIN_INSERTQ,
22024 IX86_BUILTIN_BLENDPD,
22025 IX86_BUILTIN_BLENDPS,
22026 IX86_BUILTIN_BLENDVPD,
22027 IX86_BUILTIN_BLENDVPS,
22028 IX86_BUILTIN_PBLENDVB128,
22029 IX86_BUILTIN_PBLENDW128,
22034 IX86_BUILTIN_INSERTPS128,
22036 IX86_BUILTIN_MOVNTDQA,
22037 IX86_BUILTIN_MPSADBW128,
22038 IX86_BUILTIN_PACKUSDW128,
22039 IX86_BUILTIN_PCMPEQQ,
22040 IX86_BUILTIN_PHMINPOSUW128,
22042 IX86_BUILTIN_PMAXSB128,
22043 IX86_BUILTIN_PMAXSD128,
22044 IX86_BUILTIN_PMAXUD128,
22045 IX86_BUILTIN_PMAXUW128,
22047 IX86_BUILTIN_PMINSB128,
22048 IX86_BUILTIN_PMINSD128,
22049 IX86_BUILTIN_PMINUD128,
22050 IX86_BUILTIN_PMINUW128,
22052 IX86_BUILTIN_PMOVSXBW128,
22053 IX86_BUILTIN_PMOVSXBD128,
22054 IX86_BUILTIN_PMOVSXBQ128,
22055 IX86_BUILTIN_PMOVSXWD128,
22056 IX86_BUILTIN_PMOVSXWQ128,
22057 IX86_BUILTIN_PMOVSXDQ128,
22059 IX86_BUILTIN_PMOVZXBW128,
22060 IX86_BUILTIN_PMOVZXBD128,
22061 IX86_BUILTIN_PMOVZXBQ128,
22062 IX86_BUILTIN_PMOVZXWD128,
22063 IX86_BUILTIN_PMOVZXWQ128,
22064 IX86_BUILTIN_PMOVZXDQ128,
22066 IX86_BUILTIN_PMULDQ128,
22067 IX86_BUILTIN_PMULLD128,
22069 IX86_BUILTIN_ROUNDPD,
22070 IX86_BUILTIN_ROUNDPS,
22071 IX86_BUILTIN_ROUNDSD,
22072 IX86_BUILTIN_ROUNDSS,
22074 IX86_BUILTIN_PTESTZ,
22075 IX86_BUILTIN_PTESTC,
22076 IX86_BUILTIN_PTESTNZC,
22078 IX86_BUILTIN_VEC_INIT_V2SI,
22079 IX86_BUILTIN_VEC_INIT_V4HI,
22080 IX86_BUILTIN_VEC_INIT_V8QI,
22081 IX86_BUILTIN_VEC_EXT_V2DF,
22082 IX86_BUILTIN_VEC_EXT_V2DI,
22083 IX86_BUILTIN_VEC_EXT_V4SF,
22084 IX86_BUILTIN_VEC_EXT_V4SI,
22085 IX86_BUILTIN_VEC_EXT_V8HI,
22086 IX86_BUILTIN_VEC_EXT_V2SI,
22087 IX86_BUILTIN_VEC_EXT_V4HI,
22088 IX86_BUILTIN_VEC_EXT_V16QI,
22089 IX86_BUILTIN_VEC_SET_V2DI,
22090 IX86_BUILTIN_VEC_SET_V4SF,
22091 IX86_BUILTIN_VEC_SET_V4SI,
22092 IX86_BUILTIN_VEC_SET_V8HI,
22093 IX86_BUILTIN_VEC_SET_V4HI,
22094 IX86_BUILTIN_VEC_SET_V16QI,
22096 IX86_BUILTIN_VEC_PACK_SFIX,
22099 IX86_BUILTIN_CRC32QI,
22100 IX86_BUILTIN_CRC32HI,
22101 IX86_BUILTIN_CRC32SI,
22102 IX86_BUILTIN_CRC32DI,
22104 IX86_BUILTIN_PCMPESTRI128,
22105 IX86_BUILTIN_PCMPESTRM128,
22106 IX86_BUILTIN_PCMPESTRA128,
22107 IX86_BUILTIN_PCMPESTRC128,
22108 IX86_BUILTIN_PCMPESTRO128,
22109 IX86_BUILTIN_PCMPESTRS128,
22110 IX86_BUILTIN_PCMPESTRZ128,
22111 IX86_BUILTIN_PCMPISTRI128,
22112 IX86_BUILTIN_PCMPISTRM128,
22113 IX86_BUILTIN_PCMPISTRA128,
22114 IX86_BUILTIN_PCMPISTRC128,
22115 IX86_BUILTIN_PCMPISTRO128,
22116 IX86_BUILTIN_PCMPISTRS128,
22117 IX86_BUILTIN_PCMPISTRZ128,
22119 IX86_BUILTIN_PCMPGTQ,
22121 /* AES instructions */
22122 IX86_BUILTIN_AESENC128,
22123 IX86_BUILTIN_AESENCLAST128,
22124 IX86_BUILTIN_AESDEC128,
22125 IX86_BUILTIN_AESDECLAST128,
22126 IX86_BUILTIN_AESIMC128,
22127 IX86_BUILTIN_AESKEYGENASSIST128,
22129 /* PCLMUL instruction */
22130 IX86_BUILTIN_PCLMULQDQ128,
22133 IX86_BUILTIN_ADDPD256,
22134 IX86_BUILTIN_ADDPS256,
22135 IX86_BUILTIN_ADDSUBPD256,
22136 IX86_BUILTIN_ADDSUBPS256,
22137 IX86_BUILTIN_ANDPD256,
22138 IX86_BUILTIN_ANDPS256,
22139 IX86_BUILTIN_ANDNPD256,
22140 IX86_BUILTIN_ANDNPS256,
22141 IX86_BUILTIN_BLENDPD256,
22142 IX86_BUILTIN_BLENDPS256,
22143 IX86_BUILTIN_BLENDVPD256,
22144 IX86_BUILTIN_BLENDVPS256,
22145 IX86_BUILTIN_DIVPD256,
22146 IX86_BUILTIN_DIVPS256,
22147 IX86_BUILTIN_DPPS256,
22148 IX86_BUILTIN_HADDPD256,
22149 IX86_BUILTIN_HADDPS256,
22150 IX86_BUILTIN_HSUBPD256,
22151 IX86_BUILTIN_HSUBPS256,
22152 IX86_BUILTIN_MAXPD256,
22153 IX86_BUILTIN_MAXPS256,
22154 IX86_BUILTIN_MINPD256,
22155 IX86_BUILTIN_MINPS256,
22156 IX86_BUILTIN_MULPD256,
22157 IX86_BUILTIN_MULPS256,
22158 IX86_BUILTIN_ORPD256,
22159 IX86_BUILTIN_ORPS256,
22160 IX86_BUILTIN_SHUFPD256,
22161 IX86_BUILTIN_SHUFPS256,
22162 IX86_BUILTIN_SUBPD256,
22163 IX86_BUILTIN_SUBPS256,
22164 IX86_BUILTIN_XORPD256,
22165 IX86_BUILTIN_XORPS256,
22166 IX86_BUILTIN_CMPSD,
22167 IX86_BUILTIN_CMPSS,
22168 IX86_BUILTIN_CMPPD,
22169 IX86_BUILTIN_CMPPS,
22170 IX86_BUILTIN_CMPPD256,
22171 IX86_BUILTIN_CMPPS256,
22172 IX86_BUILTIN_CVTDQ2PD256,
22173 IX86_BUILTIN_CVTDQ2PS256,
22174 IX86_BUILTIN_CVTPD2PS256,
22175 IX86_BUILTIN_CVTPS2DQ256,
22176 IX86_BUILTIN_CVTPS2PD256,
22177 IX86_BUILTIN_CVTTPD2DQ256,
22178 IX86_BUILTIN_CVTPD2DQ256,
22179 IX86_BUILTIN_CVTTPS2DQ256,
22180 IX86_BUILTIN_EXTRACTF128PD256,
22181 IX86_BUILTIN_EXTRACTF128PS256,
22182 IX86_BUILTIN_EXTRACTF128SI256,
22183 IX86_BUILTIN_VZEROALL,
22184 IX86_BUILTIN_VZEROUPPER,
22185 IX86_BUILTIN_VPERMILVARPD,
22186 IX86_BUILTIN_VPERMILVARPS,
22187 IX86_BUILTIN_VPERMILVARPD256,
22188 IX86_BUILTIN_VPERMILVARPS256,
22189 IX86_BUILTIN_VPERMILPD,
22190 IX86_BUILTIN_VPERMILPS,
22191 IX86_BUILTIN_VPERMILPD256,
22192 IX86_BUILTIN_VPERMILPS256,
22193 IX86_BUILTIN_VPERMIL2PD,
22194 IX86_BUILTIN_VPERMIL2PS,
22195 IX86_BUILTIN_VPERMIL2PD256,
22196 IX86_BUILTIN_VPERMIL2PS256,
22197 IX86_BUILTIN_VPERM2F128PD256,
22198 IX86_BUILTIN_VPERM2F128PS256,
22199 IX86_BUILTIN_VPERM2F128SI256,
22200 IX86_BUILTIN_VBROADCASTSS,
22201 IX86_BUILTIN_VBROADCASTSD256,
22202 IX86_BUILTIN_VBROADCASTSS256,
22203 IX86_BUILTIN_VBROADCASTPD256,
22204 IX86_BUILTIN_VBROADCASTPS256,
22205 IX86_BUILTIN_VINSERTF128PD256,
22206 IX86_BUILTIN_VINSERTF128PS256,
22207 IX86_BUILTIN_VINSERTF128SI256,
22208 IX86_BUILTIN_LOADUPD256,
22209 IX86_BUILTIN_LOADUPS256,
22210 IX86_BUILTIN_STOREUPD256,
22211 IX86_BUILTIN_STOREUPS256,
22212 IX86_BUILTIN_LDDQU256,
22213 IX86_BUILTIN_MOVNTDQ256,
22214 IX86_BUILTIN_MOVNTPD256,
22215 IX86_BUILTIN_MOVNTPS256,
22216 IX86_BUILTIN_LOADDQU256,
22217 IX86_BUILTIN_STOREDQU256,
22218 IX86_BUILTIN_MASKLOADPD,
22219 IX86_BUILTIN_MASKLOADPS,
22220 IX86_BUILTIN_MASKSTOREPD,
22221 IX86_BUILTIN_MASKSTOREPS,
22222 IX86_BUILTIN_MASKLOADPD256,
22223 IX86_BUILTIN_MASKLOADPS256,
22224 IX86_BUILTIN_MASKSTOREPD256,
22225 IX86_BUILTIN_MASKSTOREPS256,
22226 IX86_BUILTIN_MOVSHDUP256,
22227 IX86_BUILTIN_MOVSLDUP256,
22228 IX86_BUILTIN_MOVDDUP256,
22230 IX86_BUILTIN_SQRTPD256,
22231 IX86_BUILTIN_SQRTPS256,
22232 IX86_BUILTIN_SQRTPS_NR256,
22233 IX86_BUILTIN_RSQRTPS256,
22234 IX86_BUILTIN_RSQRTPS_NR256,
22236 IX86_BUILTIN_RCPPS256,
22238 IX86_BUILTIN_ROUNDPD256,
22239 IX86_BUILTIN_ROUNDPS256,
22241 IX86_BUILTIN_UNPCKHPD256,
22242 IX86_BUILTIN_UNPCKLPD256,
22243 IX86_BUILTIN_UNPCKHPS256,
22244 IX86_BUILTIN_UNPCKLPS256,
22246 IX86_BUILTIN_SI256_SI,
22247 IX86_BUILTIN_PS256_PS,
22248 IX86_BUILTIN_PD256_PD,
22249 IX86_BUILTIN_SI_SI256,
22250 IX86_BUILTIN_PS_PS256,
22251 IX86_BUILTIN_PD_PD256,
22253 IX86_BUILTIN_VTESTZPD,
22254 IX86_BUILTIN_VTESTCPD,
22255 IX86_BUILTIN_VTESTNZCPD,
22256 IX86_BUILTIN_VTESTZPS,
22257 IX86_BUILTIN_VTESTCPS,
22258 IX86_BUILTIN_VTESTNZCPS,
22259 IX86_BUILTIN_VTESTZPD256,
22260 IX86_BUILTIN_VTESTCPD256,
22261 IX86_BUILTIN_VTESTNZCPD256,
22262 IX86_BUILTIN_VTESTZPS256,
22263 IX86_BUILTIN_VTESTCPS256,
22264 IX86_BUILTIN_VTESTNZCPS256,
22265 IX86_BUILTIN_PTESTZ256,
22266 IX86_BUILTIN_PTESTC256,
22267 IX86_BUILTIN_PTESTNZC256,
22269 IX86_BUILTIN_MOVMSKPD256,
22270 IX86_BUILTIN_MOVMSKPS256,
22272 /* TFmode support builtins. */
22274 IX86_BUILTIN_HUGE_VALQ,
22275 IX86_BUILTIN_FABSQ,
22276 IX86_BUILTIN_COPYSIGNQ,
22278 /* Vectorizer support builtins. */
22279 IX86_BUILTIN_CPYSGNPS,
22280 IX86_BUILTIN_CPYSGNPD,
22282 IX86_BUILTIN_CVTUDQ2PS,
22284 IX86_BUILTIN_VEC_PERM_V2DF,
22285 IX86_BUILTIN_VEC_PERM_V4SF,
22286 IX86_BUILTIN_VEC_PERM_V2DI,
22287 IX86_BUILTIN_VEC_PERM_V4SI,
22288 IX86_BUILTIN_VEC_PERM_V8HI,
22289 IX86_BUILTIN_VEC_PERM_V16QI,
22290 IX86_BUILTIN_VEC_PERM_V2DI_U,
22291 IX86_BUILTIN_VEC_PERM_V4SI_U,
22292 IX86_BUILTIN_VEC_PERM_V8HI_U,
22293 IX86_BUILTIN_VEC_PERM_V16QI_U,
22294 IX86_BUILTIN_VEC_PERM_V4DF,
22295 IX86_BUILTIN_VEC_PERM_V8SF,
22297 /* FMA4 and XOP instructions. */
22298 IX86_BUILTIN_VFMADDSS,
22299 IX86_BUILTIN_VFMADDSD,
22300 IX86_BUILTIN_VFMADDPS,
22301 IX86_BUILTIN_VFMADDPD,
22302 IX86_BUILTIN_VFMSUBSS,
22303 IX86_BUILTIN_VFMSUBSD,
22304 IX86_BUILTIN_VFMSUBPS,
22305 IX86_BUILTIN_VFMSUBPD,
22306 IX86_BUILTIN_VFMADDSUBPS,
22307 IX86_BUILTIN_VFMADDSUBPD,
22308 IX86_BUILTIN_VFMSUBADDPS,
22309 IX86_BUILTIN_VFMSUBADDPD,
22310 IX86_BUILTIN_VFNMADDSS,
22311 IX86_BUILTIN_VFNMADDSD,
22312 IX86_BUILTIN_VFNMADDPS,
22313 IX86_BUILTIN_VFNMADDPD,
22314 IX86_BUILTIN_VFNMSUBSS,
22315 IX86_BUILTIN_VFNMSUBSD,
22316 IX86_BUILTIN_VFNMSUBPS,
22317 IX86_BUILTIN_VFNMSUBPD,
22318 IX86_BUILTIN_VFMADDPS256,
22319 IX86_BUILTIN_VFMADDPD256,
22320 IX86_BUILTIN_VFMSUBPS256,
22321 IX86_BUILTIN_VFMSUBPD256,
22322 IX86_BUILTIN_VFMADDSUBPS256,
22323 IX86_BUILTIN_VFMADDSUBPD256,
22324 IX86_BUILTIN_VFMSUBADDPS256,
22325 IX86_BUILTIN_VFMSUBADDPD256,
22326 IX86_BUILTIN_VFNMADDPS256,
22327 IX86_BUILTIN_VFNMADDPD256,
22328 IX86_BUILTIN_VFNMSUBPS256,
22329 IX86_BUILTIN_VFNMSUBPD256,
22331 IX86_BUILTIN_VPCMOV,
22332 IX86_BUILTIN_VPCMOV_V2DI,
22333 IX86_BUILTIN_VPCMOV_V4SI,
22334 IX86_BUILTIN_VPCMOV_V8HI,
22335 IX86_BUILTIN_VPCMOV_V16QI,
22336 IX86_BUILTIN_VPCMOV_V4SF,
22337 IX86_BUILTIN_VPCMOV_V2DF,
22338 IX86_BUILTIN_VPCMOV256,
22339 IX86_BUILTIN_VPCMOV_V4DI256,
22340 IX86_BUILTIN_VPCMOV_V8SI256,
22341 IX86_BUILTIN_VPCMOV_V16HI256,
22342 IX86_BUILTIN_VPCMOV_V32QI256,
22343 IX86_BUILTIN_VPCMOV_V8SF256,
22344 IX86_BUILTIN_VPCMOV_V4DF256,
22346 IX86_BUILTIN_VPPERM,
22348 IX86_BUILTIN_VPMACSSWW,
22349 IX86_BUILTIN_VPMACSWW,
22350 IX86_BUILTIN_VPMACSSWD,
22351 IX86_BUILTIN_VPMACSWD,
22352 IX86_BUILTIN_VPMACSSDD,
22353 IX86_BUILTIN_VPMACSDD,
22354 IX86_BUILTIN_VPMACSSDQL,
22355 IX86_BUILTIN_VPMACSSDQH,
22356 IX86_BUILTIN_VPMACSDQL,
22357 IX86_BUILTIN_VPMACSDQH,
22358 IX86_BUILTIN_VPMADCSSWD,
22359 IX86_BUILTIN_VPMADCSWD,
22361 IX86_BUILTIN_VPHADDBW,
22362 IX86_BUILTIN_VPHADDBD,
22363 IX86_BUILTIN_VPHADDBQ,
22364 IX86_BUILTIN_VPHADDWD,
22365 IX86_BUILTIN_VPHADDWQ,
22366 IX86_BUILTIN_VPHADDDQ,
22367 IX86_BUILTIN_VPHADDUBW,
22368 IX86_BUILTIN_VPHADDUBD,
22369 IX86_BUILTIN_VPHADDUBQ,
22370 IX86_BUILTIN_VPHADDUWD,
22371 IX86_BUILTIN_VPHADDUWQ,
22372 IX86_BUILTIN_VPHADDUDQ,
22373 IX86_BUILTIN_VPHSUBBW,
22374 IX86_BUILTIN_VPHSUBWD,
22375 IX86_BUILTIN_VPHSUBDQ,
22377 IX86_BUILTIN_VPROTB,
22378 IX86_BUILTIN_VPROTW,
22379 IX86_BUILTIN_VPROTD,
22380 IX86_BUILTIN_VPROTQ,
22381 IX86_BUILTIN_VPROTB_IMM,
22382 IX86_BUILTIN_VPROTW_IMM,
22383 IX86_BUILTIN_VPROTD_IMM,
22384 IX86_BUILTIN_VPROTQ_IMM,
22386 IX86_BUILTIN_VPSHLB,
22387 IX86_BUILTIN_VPSHLW,
22388 IX86_BUILTIN_VPSHLD,
22389 IX86_BUILTIN_VPSHLQ,
22390 IX86_BUILTIN_VPSHAB,
22391 IX86_BUILTIN_VPSHAW,
22392 IX86_BUILTIN_VPSHAD,
22393 IX86_BUILTIN_VPSHAQ,
22395 IX86_BUILTIN_VFRCZSS,
22396 IX86_BUILTIN_VFRCZSD,
22397 IX86_BUILTIN_VFRCZPS,
22398 IX86_BUILTIN_VFRCZPD,
22399 IX86_BUILTIN_VFRCZPS256,
22400 IX86_BUILTIN_VFRCZPD256,
22402 IX86_BUILTIN_VPCOMEQUB,
22403 IX86_BUILTIN_VPCOMNEUB,
22404 IX86_BUILTIN_VPCOMLTUB,
22405 IX86_BUILTIN_VPCOMLEUB,
22406 IX86_BUILTIN_VPCOMGTUB,
22407 IX86_BUILTIN_VPCOMGEUB,
22408 IX86_BUILTIN_VPCOMFALSEUB,
22409 IX86_BUILTIN_VPCOMTRUEUB,
22411 IX86_BUILTIN_VPCOMEQUW,
22412 IX86_BUILTIN_VPCOMNEUW,
22413 IX86_BUILTIN_VPCOMLTUW,
22414 IX86_BUILTIN_VPCOMLEUW,
22415 IX86_BUILTIN_VPCOMGTUW,
22416 IX86_BUILTIN_VPCOMGEUW,
22417 IX86_BUILTIN_VPCOMFALSEUW,
22418 IX86_BUILTIN_VPCOMTRUEUW,
22420 IX86_BUILTIN_VPCOMEQUD,
22421 IX86_BUILTIN_VPCOMNEUD,
22422 IX86_BUILTIN_VPCOMLTUD,
22423 IX86_BUILTIN_VPCOMLEUD,
22424 IX86_BUILTIN_VPCOMGTUD,
22425 IX86_BUILTIN_VPCOMGEUD,
22426 IX86_BUILTIN_VPCOMFALSEUD,
22427 IX86_BUILTIN_VPCOMTRUEUD,
22429 IX86_BUILTIN_VPCOMEQUQ,
22430 IX86_BUILTIN_VPCOMNEUQ,
22431 IX86_BUILTIN_VPCOMLTUQ,
22432 IX86_BUILTIN_VPCOMLEUQ,
22433 IX86_BUILTIN_VPCOMGTUQ,
22434 IX86_BUILTIN_VPCOMGEUQ,
22435 IX86_BUILTIN_VPCOMFALSEUQ,
22436 IX86_BUILTIN_VPCOMTRUEUQ,
22438 IX86_BUILTIN_VPCOMEQB,
22439 IX86_BUILTIN_VPCOMNEB,
22440 IX86_BUILTIN_VPCOMLTB,
22441 IX86_BUILTIN_VPCOMLEB,
22442 IX86_BUILTIN_VPCOMGTB,
22443 IX86_BUILTIN_VPCOMGEB,
22444 IX86_BUILTIN_VPCOMFALSEB,
22445 IX86_BUILTIN_VPCOMTRUEB,
22447 IX86_BUILTIN_VPCOMEQW,
22448 IX86_BUILTIN_VPCOMNEW,
22449 IX86_BUILTIN_VPCOMLTW,
22450 IX86_BUILTIN_VPCOMLEW,
22451 IX86_BUILTIN_VPCOMGTW,
22452 IX86_BUILTIN_VPCOMGEW,
22453 IX86_BUILTIN_VPCOMFALSEW,
22454 IX86_BUILTIN_VPCOMTRUEW,
22456 IX86_BUILTIN_VPCOMEQD,
22457 IX86_BUILTIN_VPCOMNED,
22458 IX86_BUILTIN_VPCOMLTD,
22459 IX86_BUILTIN_VPCOMLED,
22460 IX86_BUILTIN_VPCOMGTD,
22461 IX86_BUILTIN_VPCOMGED,
22462 IX86_BUILTIN_VPCOMFALSED,
22463 IX86_BUILTIN_VPCOMTRUED,
22465 IX86_BUILTIN_VPCOMEQQ,
22466 IX86_BUILTIN_VPCOMNEQ,
22467 IX86_BUILTIN_VPCOMLTQ,
22468 IX86_BUILTIN_VPCOMLEQ,
22469 IX86_BUILTIN_VPCOMGTQ,
22470 IX86_BUILTIN_VPCOMGEQ,
22471 IX86_BUILTIN_VPCOMFALSEQ,
22472 IX86_BUILTIN_VPCOMTRUEQ,
22474 /* LWP instructions. */
22475 IX86_BUILTIN_LLWPCB,
22476 IX86_BUILTIN_SLWPCB,
22477 IX86_BUILTIN_LWPVAL32,
22478 IX86_BUILTIN_LWPVAL64,
22479 IX86_BUILTIN_LWPINS32,
22480 IX86_BUILTIN_LWPINS64,
22484 /* FSGSBASE instructions. */
22485 IX86_BUILTIN_RDFSBASE32,
22486 IX86_BUILTIN_RDFSBASE64,
22487 IX86_BUILTIN_RDGSBASE32,
22488 IX86_BUILTIN_RDGSBASE64,
22489 IX86_BUILTIN_WRFSBASE32,
22490 IX86_BUILTIN_WRFSBASE64,
22491 IX86_BUILTIN_WRGSBASE32,
22492 IX86_BUILTIN_WRGSBASE64,
22494 /* RDRND instructions. */
22495 IX86_BUILTIN_RDRAND16,
22496 IX86_BUILTIN_RDRAND32,
22497 IX86_BUILTIN_RDRAND64,
22499 /* F16C instructions. */
22500 IX86_BUILTIN_CVTPH2PS,
22501 IX86_BUILTIN_CVTPH2PS256,
22502 IX86_BUILTIN_CVTPS2PH,
22503 IX86_BUILTIN_CVTPS2PH256,
22508 /* Table for the ix86 builtin decls. */
22509 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22511 /* Table of all of the builtin functions that are possible with different ISA's
22512 but are waiting to be built until a function is declared to use that
22514 struct builtin_isa {
22515 const char *name; /* function name */
22516 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22517 int isa; /* isa_flags this builtin is defined for */
22518 bool const_p; /* true if the declaration is constant */
22519 bool set_and_not_built_p;
22522 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22525 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22526 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22527 function decl in the ix86_builtins array. Returns the function decl or
22528 NULL_TREE, if the builtin was not added.
22530 If the front end has a special hook for builtin functions, delay adding
22531 builtin functions that aren't in the current ISA until the ISA is changed
22532 with function specific optimization. Doing so, can save about 300K for the
22533 default compiler. When the builtin is expanded, check at that time whether
22536 If the front end doesn't have a special hook, record all builtins, even if
22537 it isn't an instruction set in the current ISA in case the user uses
22538 function specific options for a different ISA, so that we don't get scope
22539 errors if a builtin is added in the middle of a function scope. */
22542 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22543 enum ix86_builtins code)
22545 tree decl = NULL_TREE;
22547 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22549 ix86_builtins_isa[(int) code].isa = mask;
22551 mask &= ~OPTION_MASK_ISA_64BIT;
22553 || (mask & ix86_isa_flags) != 0
22554 || (lang_hooks.builtin_function
22555 == lang_hooks.builtin_function_ext_scope))
22558 tree type = ix86_get_builtin_func_type (tcode);
22559 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22561 ix86_builtins[(int) code] = decl;
22562 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22566 ix86_builtins[(int) code] = NULL_TREE;
22567 ix86_builtins_isa[(int) code].tcode = tcode;
22568 ix86_builtins_isa[(int) code].name = name;
22569 ix86_builtins_isa[(int) code].const_p = false;
22570 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22577 /* Like def_builtin, but also marks the function decl "const". */
22580 def_builtin_const (int mask, const char *name,
22581 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22583 tree decl = def_builtin (mask, name, tcode, code);
22585 TREE_READONLY (decl) = 1;
22587 ix86_builtins_isa[(int) code].const_p = true;
22592 /* Add any new builtin functions for a given ISA that may not have been
22593 declared. This saves a bit of space compared to adding all of the
22594 declarations to the tree, even if we didn't use them. */
22597 ix86_add_new_builtins (int isa)
22601 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22603 if ((ix86_builtins_isa[i].isa & isa) != 0
22604 && ix86_builtins_isa[i].set_and_not_built_p)
22608 /* Don't define the builtin again. */
22609 ix86_builtins_isa[i].set_and_not_built_p = false;
22611 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22612 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22613 type, i, BUILT_IN_MD, NULL,
22616 ix86_builtins[i] = decl;
22617 if (ix86_builtins_isa[i].const_p)
22618 TREE_READONLY (decl) = 1;
22623 /* Bits for builtin_description.flag. */
22625 /* Set when we don't support the comparison natively, and should
22626 swap_comparison in order to support it. */
22627 #define BUILTIN_DESC_SWAP_OPERANDS 1
22629 struct builtin_description
22631 const unsigned int mask;
22632 const enum insn_code icode;
22633 const char *const name;
22634 const enum ix86_builtins code;
22635 const enum rtx_code comparison;
22639 static const struct builtin_description bdesc_comi[] =
22641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22645 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22646 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22652 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22667 static const struct builtin_description bdesc_pcmpestr[] =
22670 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22671 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22672 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22673 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22674 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22675 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22676 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22679 static const struct builtin_description bdesc_pcmpistr[] =
22682 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22683 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22684 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22685 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22686 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22687 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22688 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22691 /* Special builtins with variable number of arguments. */
22692 static const struct builtin_description bdesc_special_args[] =
22694 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22695 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22698 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22701 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22704 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22705 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22706 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22708 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22709 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22710 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22711 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22713 /* SSE or 3DNow!A */
22714 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22715 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22732 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22738 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22739 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22745 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22746 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22747 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22772 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22773 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22774 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22775 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22776 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22777 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22780 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22781 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22782 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22783 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22784 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22785 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22786 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22787 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22790 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22791 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22792 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22795 /* Builtins with variable number of arguments. */
22796 static const struct builtin_description bdesc_args[] =
22798 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22799 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22800 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22801 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22802 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22803 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22804 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22809 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22810 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22811 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22812 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22818 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22839 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22840 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22841 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22842 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22845 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22847 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22849 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22851 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22852 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22853 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22854 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22855 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22856 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22858 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22859 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22861 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22862 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22863 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22865 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22866 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22867 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22871 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22872 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22873 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22874 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22876 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22877 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22878 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22879 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22880 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22881 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22882 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22883 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22884 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22885 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22886 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22887 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22888 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22889 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22890 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22893 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22894 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22895 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22896 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22897 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22898 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22903 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22905 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22909 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22912 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22916 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22917 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22918 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22948 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22949 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22953 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22955 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22956 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22958 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22962 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22963 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22964 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22966 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22968 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22970 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22972 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22973 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22976 /* SSE MMX or 3Dnow!A */
22977 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22978 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22979 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22981 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22982 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22983 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22984 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22986 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22987 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22989 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22994 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22995 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22996 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22997 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22998 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22999 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23000 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23001 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23002 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23003 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23004 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23005 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23024 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23025 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23031 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23032 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23033 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23034 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23062 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23066 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23068 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23069 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23071 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23074 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23075 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23077 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23079 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23080 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23081 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23082 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23083 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23084 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23085 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23086 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23097 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23098 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23100 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23102 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23103 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23115 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23116 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23117 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23120 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23121 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23122 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23123 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23124 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23125 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23126 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23127 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23133 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23142 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23147 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23148 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23149 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23150 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23151 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23152 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23155 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23156 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23157 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23158 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23159 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23160 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23162 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23163 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23164 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23165 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23173 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23174 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23179 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23180 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23183 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23184 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23186 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23187 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23188 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23189 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23190 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23191 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23194 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23195 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23196 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23197 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23198 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23199 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23201 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23202 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23203 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23204 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23205 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23206 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23207 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23208 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23209 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23210 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23211 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23212 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23213 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23214 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23215 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23216 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23217 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23218 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23219 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23220 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23221 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23222 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23223 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23224 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23227 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23228 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23234 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23235 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23236 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23238 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23239 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23240 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23242 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23243 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23244 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23245 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23246 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23247 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23248 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23249 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23250 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23251 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23252 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23253 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23254 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23256 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23257 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23258 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23259 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23260 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23261 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23262 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23263 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23264 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23265 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23270 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23271 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23272 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23273 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23275 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23276 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23277 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23280 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23281 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23282 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23283 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23284 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23287 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23288 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23289 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23290 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23293 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23294 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23296 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23297 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23298 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23299 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23302 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23305 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23306 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23309 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23310 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23313 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23319 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23320 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23321 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23322 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23323 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23324 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23325 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23326 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23327 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23328 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23329 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23330 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23351 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23357 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23376 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23378 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23379 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23380 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23395 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23396 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23397 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23412 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23413 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23418 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23421 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23422 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23423 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23424 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23427 /* FMA4 and XOP. */
23428 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23429 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23430 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23431 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23432 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23433 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23434 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23435 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23436 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23437 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23438 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23439 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23440 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23441 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23442 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23443 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23444 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23445 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23446 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23447 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23448 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23449 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23450 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23451 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23452 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23453 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23454 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23455 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23456 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23457 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23458 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23459 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23460 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23461 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23462 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23463 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23464 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23465 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23466 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23467 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23468 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23469 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23470 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23471 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23472 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23473 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23474 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23475 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23476 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23477 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23478 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23479 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23481 static const struct builtin_description bdesc_multi_arg[] =
23483 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23484 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23485 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23486 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23487 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23488 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23489 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23490 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23492 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23493 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23494 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23495 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23496 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23497 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23498 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23499 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23501 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23502 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23503 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23504 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23506 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23507 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23508 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23509 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23511 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23512 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23513 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23514 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23516 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23517 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23518 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23519 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23672 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23677 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23681 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23682 in the current target ISA to allow the user to compile particular modules
23683 with different target specific options that differ from the command line
23686 ix86_init_mmx_sse_builtins (void)
23688 const struct builtin_description * d;
23689 enum ix86_builtin_func_type ftype;
23692 /* Add all special builtins with variable number of operands. */
23693 for (i = 0, d = bdesc_special_args;
23694 i < ARRAY_SIZE (bdesc_special_args);
23700 ftype = (enum ix86_builtin_func_type) d->flag;
23701 def_builtin (d->mask, d->name, ftype, d->code);
23704 /* Add all builtins with variable number of operands. */
23705 for (i = 0, d = bdesc_args;
23706 i < ARRAY_SIZE (bdesc_args);
23712 ftype = (enum ix86_builtin_func_type) d->flag;
23713 def_builtin_const (d->mask, d->name, ftype, d->code);
23716 /* pcmpestr[im] insns. */
23717 for (i = 0, d = bdesc_pcmpestr;
23718 i < ARRAY_SIZE (bdesc_pcmpestr);
23721 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23722 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23724 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23725 def_builtin_const (d->mask, d->name, ftype, d->code);
23728 /* pcmpistr[im] insns. */
23729 for (i = 0, d = bdesc_pcmpistr;
23730 i < ARRAY_SIZE (bdesc_pcmpistr);
23733 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23734 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23736 ftype = INT_FTYPE_V16QI_V16QI_INT;
23737 def_builtin_const (d->mask, d->name, ftype, d->code);
23740 /* comi/ucomi insns. */
23741 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23743 if (d->mask == OPTION_MASK_ISA_SSE2)
23744 ftype = INT_FTYPE_V2DF_V2DF;
23746 ftype = INT_FTYPE_V4SF_V4SF;
23747 def_builtin_const (d->mask, d->name, ftype, d->code);
23751 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23752 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23753 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23754 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23756 /* SSE or 3DNow!A */
23757 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23758 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23759 IX86_BUILTIN_MASKMOVQ);
23762 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23763 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23765 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23766 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23767 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23768 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23771 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23772 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23773 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23774 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23777 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23778 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23779 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23780 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23781 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23782 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23783 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23784 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23785 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23786 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23787 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23788 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23791 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23792 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23794 /* MMX access to the vec_init patterns. */
23795 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23796 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23798 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23799 V4HI_FTYPE_HI_HI_HI_HI,
23800 IX86_BUILTIN_VEC_INIT_V4HI);
23802 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23803 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23804 IX86_BUILTIN_VEC_INIT_V8QI);
23806 /* Access to the vec_extract patterns. */
23807 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23808 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23809 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23810 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23811 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23812 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23813 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23814 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23815 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23816 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23818 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23819 "__builtin_ia32_vec_ext_v4hi",
23820 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23822 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23823 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23825 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23826 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23828 /* Access to the vec_set patterns. */
23829 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23830 "__builtin_ia32_vec_set_v2di",
23831 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23833 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23834 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23836 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23837 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23839 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23840 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23842 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23843 "__builtin_ia32_vec_set_v4hi",
23844 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23846 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23847 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23849 /* Add FMA4 multi-arg argument instructions */
23850 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23855 ftype = (enum ix86_builtin_func_type) d->flag;
23856 def_builtin_const (d->mask, d->name, ftype, d->code);
23860 /* Internal method for ix86_init_builtins. */
23863 ix86_init_builtins_va_builtins_abi (void)
23865 tree ms_va_ref, sysv_va_ref;
23866 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23867 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23868 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23869 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23873 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23874 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23875 ms_va_ref = build_reference_type (ms_va_list_type_node);
23877 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23880 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23881 fnvoid_va_start_ms =
23882 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23883 fnvoid_va_end_sysv =
23884 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23885 fnvoid_va_start_sysv =
23886 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23888 fnvoid_va_copy_ms =
23889 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23891 fnvoid_va_copy_sysv =
23892 build_function_type_list (void_type_node, sysv_va_ref,
23893 sysv_va_ref, NULL_TREE);
23895 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23896 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23897 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23898 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23899 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23900 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23901 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23902 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23903 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23904 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23905 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23906 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23910 ix86_init_builtin_types (void)
23912 tree float128_type_node, float80_type_node;
23914 /* The __float80 type. */
23915 float80_type_node = long_double_type_node;
23916 if (TYPE_MODE (float80_type_node) != XFmode)
23918 /* The __float80 type. */
23919 float80_type_node = make_node (REAL_TYPE);
23921 TYPE_PRECISION (float80_type_node) = 80;
23922 layout_type (float80_type_node);
23924 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23926 /* The __float128 type. */
23927 float128_type_node = make_node (REAL_TYPE);
23928 TYPE_PRECISION (float128_type_node) = 128;
23929 layout_type (float128_type_node);
23930 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23932 /* This macro is built by i386-builtin-types.awk. */
23933 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23937 ix86_init_builtins (void)
23941 ix86_init_builtin_types ();
23943 /* TFmode support builtins. */
23944 def_builtin_const (0, "__builtin_infq",
23945 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23946 def_builtin_const (0, "__builtin_huge_valq",
23947 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23949 /* We will expand them to normal call if SSE2 isn't available since
23950 they are used by libgcc. */
23951 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23952 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23953 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23954 TREE_READONLY (t) = 1;
23955 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23957 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23958 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23959 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23960 TREE_READONLY (t) = 1;
23961 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23963 ix86_init_mmx_sse_builtins ();
23966 ix86_init_builtins_va_builtins_abi ();
23969 /* Return the ix86 builtin for CODE. */
23972 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23974 if (code >= IX86_BUILTIN_MAX)
23975 return error_mark_node;
23977 return ix86_builtins[code];
23980 /* Errors in the source file can cause expand_expr to return const0_rtx
23981 where we expect a vector. To avoid crashing, use one of the vector
23982 clear instructions. */
23984 safe_vector_operand (rtx x, enum machine_mode mode)
23986 if (x == const0_rtx)
23987 x = CONST0_RTX (mode);
23991 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23994 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23997 tree arg0 = CALL_EXPR_ARG (exp, 0);
23998 tree arg1 = CALL_EXPR_ARG (exp, 1);
23999 rtx op0 = expand_normal (arg0);
24000 rtx op1 = expand_normal (arg1);
24001 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24002 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24003 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24005 if (VECTOR_MODE_P (mode0))
24006 op0 = safe_vector_operand (op0, mode0);
24007 if (VECTOR_MODE_P (mode1))
24008 op1 = safe_vector_operand (op1, mode1);
24010 if (optimize || !target
24011 || GET_MODE (target) != tmode
24012 || !insn_data[icode].operand[0].predicate (target, tmode))
24013 target = gen_reg_rtx (tmode);
24015 if (GET_MODE (op1) == SImode && mode1 == TImode)
24017 rtx x = gen_reg_rtx (V4SImode);
24018 emit_insn (gen_sse2_loadd (x, op1));
24019 op1 = gen_lowpart (TImode, x);
24022 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24023 op0 = copy_to_mode_reg (mode0, op0);
24024 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24025 op1 = copy_to_mode_reg (mode1, op1);
24027 pat = GEN_FCN (icode) (target, op0, op1);
24036 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24039 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24040 enum ix86_builtin_func_type m_type,
24041 enum rtx_code sub_code)
24046 bool comparison_p = false;
24048 bool last_arg_constant = false;
24049 int num_memory = 0;
24052 enum machine_mode mode;
24055 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24059 case MULTI_ARG_4_DF2_DI_I:
24060 case MULTI_ARG_4_DF2_DI_I1:
24061 case MULTI_ARG_4_SF2_SI_I:
24062 case MULTI_ARG_4_SF2_SI_I1:
24064 last_arg_constant = true;
24067 case MULTI_ARG_3_SF:
24068 case MULTI_ARG_3_DF:
24069 case MULTI_ARG_3_SF2:
24070 case MULTI_ARG_3_DF2:
24071 case MULTI_ARG_3_DI:
24072 case MULTI_ARG_3_SI:
24073 case MULTI_ARG_3_SI_DI:
24074 case MULTI_ARG_3_HI:
24075 case MULTI_ARG_3_HI_SI:
24076 case MULTI_ARG_3_QI:
24077 case MULTI_ARG_3_DI2:
24078 case MULTI_ARG_3_SI2:
24079 case MULTI_ARG_3_HI2:
24080 case MULTI_ARG_3_QI2:
24084 case MULTI_ARG_2_SF:
24085 case MULTI_ARG_2_DF:
24086 case MULTI_ARG_2_DI:
24087 case MULTI_ARG_2_SI:
24088 case MULTI_ARG_2_HI:
24089 case MULTI_ARG_2_QI:
24093 case MULTI_ARG_2_DI_IMM:
24094 case MULTI_ARG_2_SI_IMM:
24095 case MULTI_ARG_2_HI_IMM:
24096 case MULTI_ARG_2_QI_IMM:
24098 last_arg_constant = true;
24101 case MULTI_ARG_1_SF:
24102 case MULTI_ARG_1_DF:
24103 case MULTI_ARG_1_SF2:
24104 case MULTI_ARG_1_DF2:
24105 case MULTI_ARG_1_DI:
24106 case MULTI_ARG_1_SI:
24107 case MULTI_ARG_1_HI:
24108 case MULTI_ARG_1_QI:
24109 case MULTI_ARG_1_SI_DI:
24110 case MULTI_ARG_1_HI_DI:
24111 case MULTI_ARG_1_HI_SI:
24112 case MULTI_ARG_1_QI_DI:
24113 case MULTI_ARG_1_QI_SI:
24114 case MULTI_ARG_1_QI_HI:
24118 case MULTI_ARG_2_DI_CMP:
24119 case MULTI_ARG_2_SI_CMP:
24120 case MULTI_ARG_2_HI_CMP:
24121 case MULTI_ARG_2_QI_CMP:
24123 comparison_p = true;
24126 case MULTI_ARG_2_SF_TF:
24127 case MULTI_ARG_2_DF_TF:
24128 case MULTI_ARG_2_DI_TF:
24129 case MULTI_ARG_2_SI_TF:
24130 case MULTI_ARG_2_HI_TF:
24131 case MULTI_ARG_2_QI_TF:
24137 gcc_unreachable ();
24140 if (optimize || !target
24141 || GET_MODE (target) != tmode
24142 || !insn_data[icode].operand[0].predicate (target, tmode))
24143 target = gen_reg_rtx (tmode);
24145 gcc_assert (nargs <= 4);
24147 for (i = 0; i < nargs; i++)
24149 tree arg = CALL_EXPR_ARG (exp, i);
24150 rtx op = expand_normal (arg);
24151 int adjust = (comparison_p) ? 1 : 0;
24152 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24154 if (last_arg_constant && i == nargs-1)
24156 if (!CONST_INT_P (op))
24158 error ("last argument must be an immediate");
24159 return gen_reg_rtx (tmode);
24164 if (VECTOR_MODE_P (mode))
24165 op = safe_vector_operand (op, mode);
24167 /* If we aren't optimizing, only allow one memory operand to be
24169 if (memory_operand (op, mode))
24172 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24175 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24177 op = force_reg (mode, op);
24181 args[i].mode = mode;
24187 pat = GEN_FCN (icode) (target, args[0].op);
24192 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24193 GEN_INT ((int)sub_code));
24194 else if (! comparison_p)
24195 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24198 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24202 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24207 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24211 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24215 gcc_unreachable ();
24225 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24226 insns with vec_merge. */
24229 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24233 tree arg0 = CALL_EXPR_ARG (exp, 0);
24234 rtx op1, op0 = expand_normal (arg0);
24235 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24236 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24238 if (optimize || !target
24239 || GET_MODE (target) != tmode
24240 || !insn_data[icode].operand[0].predicate (target, tmode))
24241 target = gen_reg_rtx (tmode);
24243 if (VECTOR_MODE_P (mode0))
24244 op0 = safe_vector_operand (op0, mode0);
24246 if ((optimize && !register_operand (op0, mode0))
24247 || !insn_data[icode].operand[1].predicate (op0, mode0))
24248 op0 = copy_to_mode_reg (mode0, op0);
24251 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24252 op1 = copy_to_mode_reg (mode0, op1);
24254 pat = GEN_FCN (icode) (target, op0, op1);
24261 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24264 ix86_expand_sse_compare (const struct builtin_description *d,
24265 tree exp, rtx target, bool swap)
24268 tree arg0 = CALL_EXPR_ARG (exp, 0);
24269 tree arg1 = CALL_EXPR_ARG (exp, 1);
24270 rtx op0 = expand_normal (arg0);
24271 rtx op1 = expand_normal (arg1);
24273 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24274 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24275 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24276 enum rtx_code comparison = d->comparison;
24278 if (VECTOR_MODE_P (mode0))
24279 op0 = safe_vector_operand (op0, mode0);
24280 if (VECTOR_MODE_P (mode1))
24281 op1 = safe_vector_operand (op1, mode1);
24283 /* Swap operands if we have a comparison that isn't available in
24287 rtx tmp = gen_reg_rtx (mode1);
24288 emit_move_insn (tmp, op1);
24293 if (optimize || !target
24294 || GET_MODE (target) != tmode
24295 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24296 target = gen_reg_rtx (tmode);
24298 if ((optimize && !register_operand (op0, mode0))
24299 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24300 op0 = copy_to_mode_reg (mode0, op0);
24301 if ((optimize && !register_operand (op1, mode1))
24302 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24303 op1 = copy_to_mode_reg (mode1, op1);
24305 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24306 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24313 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24316 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24320 tree arg0 = CALL_EXPR_ARG (exp, 0);
24321 tree arg1 = CALL_EXPR_ARG (exp, 1);
24322 rtx op0 = expand_normal (arg0);
24323 rtx op1 = expand_normal (arg1);
24324 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24325 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24326 enum rtx_code comparison = d->comparison;
24328 if (VECTOR_MODE_P (mode0))
24329 op0 = safe_vector_operand (op0, mode0);
24330 if (VECTOR_MODE_P (mode1))
24331 op1 = safe_vector_operand (op1, mode1);
24333 /* Swap operands if we have a comparison that isn't available in
24335 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24342 target = gen_reg_rtx (SImode);
24343 emit_move_insn (target, const0_rtx);
24344 target = gen_rtx_SUBREG (QImode, target, 0);
24346 if ((optimize && !register_operand (op0, mode0))
24347 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24348 op0 = copy_to_mode_reg (mode0, op0);
24349 if ((optimize && !register_operand (op1, mode1))
24350 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24351 op1 = copy_to_mode_reg (mode1, op1);
24353 pat = GEN_FCN (d->icode) (op0, op1);
24357 emit_insn (gen_rtx_SET (VOIDmode,
24358 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24359 gen_rtx_fmt_ee (comparison, QImode,
24363 return SUBREG_REG (target);
24366 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24369 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24373 tree arg0 = CALL_EXPR_ARG (exp, 0);
24374 tree arg1 = CALL_EXPR_ARG (exp, 1);
24375 rtx op0 = expand_normal (arg0);
24376 rtx op1 = expand_normal (arg1);
24377 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24378 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24379 enum rtx_code comparison = d->comparison;
24381 if (VECTOR_MODE_P (mode0))
24382 op0 = safe_vector_operand (op0, mode0);
24383 if (VECTOR_MODE_P (mode1))
24384 op1 = safe_vector_operand (op1, mode1);
24386 target = gen_reg_rtx (SImode);
24387 emit_move_insn (target, const0_rtx);
24388 target = gen_rtx_SUBREG (QImode, target, 0);
24390 if ((optimize && !register_operand (op0, mode0))
24391 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24392 op0 = copy_to_mode_reg (mode0, op0);
24393 if ((optimize && !register_operand (op1, mode1))
24394 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24395 op1 = copy_to_mode_reg (mode1, op1);
24397 pat = GEN_FCN (d->icode) (op0, op1);
24401 emit_insn (gen_rtx_SET (VOIDmode,
24402 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24403 gen_rtx_fmt_ee (comparison, QImode,
24407 return SUBREG_REG (target);
24410 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24413 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24414 tree exp, rtx target)
24417 tree arg0 = CALL_EXPR_ARG (exp, 0);
24418 tree arg1 = CALL_EXPR_ARG (exp, 1);
24419 tree arg2 = CALL_EXPR_ARG (exp, 2);
24420 tree arg3 = CALL_EXPR_ARG (exp, 3);
24421 tree arg4 = CALL_EXPR_ARG (exp, 4);
24422 rtx scratch0, scratch1;
24423 rtx op0 = expand_normal (arg0);
24424 rtx op1 = expand_normal (arg1);
24425 rtx op2 = expand_normal (arg2);
24426 rtx op3 = expand_normal (arg3);
24427 rtx op4 = expand_normal (arg4);
24428 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24430 tmode0 = insn_data[d->icode].operand[0].mode;
24431 tmode1 = insn_data[d->icode].operand[1].mode;
24432 modev2 = insn_data[d->icode].operand[2].mode;
24433 modei3 = insn_data[d->icode].operand[3].mode;
24434 modev4 = insn_data[d->icode].operand[4].mode;
24435 modei5 = insn_data[d->icode].operand[5].mode;
24436 modeimm = insn_data[d->icode].operand[6].mode;
24438 if (VECTOR_MODE_P (modev2))
24439 op0 = safe_vector_operand (op0, modev2);
24440 if (VECTOR_MODE_P (modev4))
24441 op2 = safe_vector_operand (op2, modev4);
24443 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24444 op0 = copy_to_mode_reg (modev2, op0);
24445 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24446 op1 = copy_to_mode_reg (modei3, op1);
24447 if ((optimize && !register_operand (op2, modev4))
24448 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24449 op2 = copy_to_mode_reg (modev4, op2);
24450 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24451 op3 = copy_to_mode_reg (modei5, op3);
24453 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24455 error ("the fifth argument must be a 8-bit immediate");
24459 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24461 if (optimize || !target
24462 || GET_MODE (target) != tmode0
24463 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24464 target = gen_reg_rtx (tmode0);
24466 scratch1 = gen_reg_rtx (tmode1);
24468 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24470 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24472 if (optimize || !target
24473 || GET_MODE (target) != tmode1
24474 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24475 target = gen_reg_rtx (tmode1);
24477 scratch0 = gen_reg_rtx (tmode0);
24479 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24483 gcc_assert (d->flag);
24485 scratch0 = gen_reg_rtx (tmode0);
24486 scratch1 = gen_reg_rtx (tmode1);
24488 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24498 target = gen_reg_rtx (SImode);
24499 emit_move_insn (target, const0_rtx);
24500 target = gen_rtx_SUBREG (QImode, target, 0);
24503 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24504 gen_rtx_fmt_ee (EQ, QImode,
24505 gen_rtx_REG ((enum machine_mode) d->flag,
24508 return SUBREG_REG (target);
24515 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24518 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24519 tree exp, rtx target)
24522 tree arg0 = CALL_EXPR_ARG (exp, 0);
24523 tree arg1 = CALL_EXPR_ARG (exp, 1);
24524 tree arg2 = CALL_EXPR_ARG (exp, 2);
24525 rtx scratch0, scratch1;
24526 rtx op0 = expand_normal (arg0);
24527 rtx op1 = expand_normal (arg1);
24528 rtx op2 = expand_normal (arg2);
24529 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24531 tmode0 = insn_data[d->icode].operand[0].mode;
24532 tmode1 = insn_data[d->icode].operand[1].mode;
24533 modev2 = insn_data[d->icode].operand[2].mode;
24534 modev3 = insn_data[d->icode].operand[3].mode;
24535 modeimm = insn_data[d->icode].operand[4].mode;
24537 if (VECTOR_MODE_P (modev2))
24538 op0 = safe_vector_operand (op0, modev2);
24539 if (VECTOR_MODE_P (modev3))
24540 op1 = safe_vector_operand (op1, modev3);
24542 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24543 op0 = copy_to_mode_reg (modev2, op0);
24544 if ((optimize && !register_operand (op1, modev3))
24545 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24546 op1 = copy_to_mode_reg (modev3, op1);
24548 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24550 error ("the third argument must be a 8-bit immediate");
24554 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24556 if (optimize || !target
24557 || GET_MODE (target) != tmode0
24558 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24559 target = gen_reg_rtx (tmode0);
24561 scratch1 = gen_reg_rtx (tmode1);
24563 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24565 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24567 if (optimize || !target
24568 || GET_MODE (target) != tmode1
24569 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24570 target = gen_reg_rtx (tmode1);
24572 scratch0 = gen_reg_rtx (tmode0);
24574 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24578 gcc_assert (d->flag);
24580 scratch0 = gen_reg_rtx (tmode0);
24581 scratch1 = gen_reg_rtx (tmode1);
24583 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24593 target = gen_reg_rtx (SImode);
24594 emit_move_insn (target, const0_rtx);
24595 target = gen_rtx_SUBREG (QImode, target, 0);
24598 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24599 gen_rtx_fmt_ee (EQ, QImode,
24600 gen_rtx_REG ((enum machine_mode) d->flag,
24603 return SUBREG_REG (target);
24609 /* Subroutine of ix86_expand_builtin to take care of insns with
24610 variable number of operands. */
24613 ix86_expand_args_builtin (const struct builtin_description *d,
24614 tree exp, rtx target)
24616 rtx pat, real_target;
24617 unsigned int i, nargs;
24618 unsigned int nargs_constant = 0;
24619 int num_memory = 0;
24623 enum machine_mode mode;
24625 bool last_arg_count = false;
24626 enum insn_code icode = d->icode;
24627 const struct insn_data_d *insn_p = &insn_data[icode];
24628 enum machine_mode tmode = insn_p->operand[0].mode;
24629 enum machine_mode rmode = VOIDmode;
24631 enum rtx_code comparison = d->comparison;
24633 switch ((enum ix86_builtin_func_type) d->flag)
24635 case INT_FTYPE_V8SF_V8SF_PTEST:
24636 case INT_FTYPE_V4DI_V4DI_PTEST:
24637 case INT_FTYPE_V4DF_V4DF_PTEST:
24638 case INT_FTYPE_V4SF_V4SF_PTEST:
24639 case INT_FTYPE_V2DI_V2DI_PTEST:
24640 case INT_FTYPE_V2DF_V2DF_PTEST:
24641 return ix86_expand_sse_ptest (d, exp, target);
24642 case FLOAT128_FTYPE_FLOAT128:
24643 case FLOAT_FTYPE_FLOAT:
24644 case INT_FTYPE_INT:
24645 case UINT64_FTYPE_INT:
24646 case UINT16_FTYPE_UINT16:
24647 case INT64_FTYPE_INT64:
24648 case INT64_FTYPE_V4SF:
24649 case INT64_FTYPE_V2DF:
24650 case INT_FTYPE_V16QI:
24651 case INT_FTYPE_V8QI:
24652 case INT_FTYPE_V8SF:
24653 case INT_FTYPE_V4DF:
24654 case INT_FTYPE_V4SF:
24655 case INT_FTYPE_V2DF:
24656 case V16QI_FTYPE_V16QI:
24657 case V8SI_FTYPE_V8SF:
24658 case V8SI_FTYPE_V4SI:
24659 case V8HI_FTYPE_V8HI:
24660 case V8HI_FTYPE_V16QI:
24661 case V8QI_FTYPE_V8QI:
24662 case V8SF_FTYPE_V8SF:
24663 case V8SF_FTYPE_V8SI:
24664 case V8SF_FTYPE_V4SF:
24665 case V8SF_FTYPE_V8HI:
24666 case V4SI_FTYPE_V4SI:
24667 case V4SI_FTYPE_V16QI:
24668 case V4SI_FTYPE_V4SF:
24669 case V4SI_FTYPE_V8SI:
24670 case V4SI_FTYPE_V8HI:
24671 case V4SI_FTYPE_V4DF:
24672 case V4SI_FTYPE_V2DF:
24673 case V4HI_FTYPE_V4HI:
24674 case V4DF_FTYPE_V4DF:
24675 case V4DF_FTYPE_V4SI:
24676 case V4DF_FTYPE_V4SF:
24677 case V4DF_FTYPE_V2DF:
24678 case V4SF_FTYPE_V4SF:
24679 case V4SF_FTYPE_V4SI:
24680 case V4SF_FTYPE_V8SF:
24681 case V4SF_FTYPE_V4DF:
24682 case V4SF_FTYPE_V8HI:
24683 case V4SF_FTYPE_V2DF:
24684 case V2DI_FTYPE_V2DI:
24685 case V2DI_FTYPE_V16QI:
24686 case V2DI_FTYPE_V8HI:
24687 case V2DI_FTYPE_V4SI:
24688 case V2DF_FTYPE_V2DF:
24689 case V2DF_FTYPE_V4SI:
24690 case V2DF_FTYPE_V4DF:
24691 case V2DF_FTYPE_V4SF:
24692 case V2DF_FTYPE_V2SI:
24693 case V2SI_FTYPE_V2SI:
24694 case V2SI_FTYPE_V4SF:
24695 case V2SI_FTYPE_V2SF:
24696 case V2SI_FTYPE_V2DF:
24697 case V2SF_FTYPE_V2SF:
24698 case V2SF_FTYPE_V2SI:
24701 case V4SF_FTYPE_V4SF_VEC_MERGE:
24702 case V2DF_FTYPE_V2DF_VEC_MERGE:
24703 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24704 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24705 case V16QI_FTYPE_V16QI_V16QI:
24706 case V16QI_FTYPE_V8HI_V8HI:
24707 case V8QI_FTYPE_V8QI_V8QI:
24708 case V8QI_FTYPE_V4HI_V4HI:
24709 case V8HI_FTYPE_V8HI_V8HI:
24710 case V8HI_FTYPE_V16QI_V16QI:
24711 case V8HI_FTYPE_V4SI_V4SI:
24712 case V8SF_FTYPE_V8SF_V8SF:
24713 case V8SF_FTYPE_V8SF_V8SI:
24714 case V4SI_FTYPE_V4SI_V4SI:
24715 case V4SI_FTYPE_V8HI_V8HI:
24716 case V4SI_FTYPE_V4SF_V4SF:
24717 case V4SI_FTYPE_V2DF_V2DF:
24718 case V4HI_FTYPE_V4HI_V4HI:
24719 case V4HI_FTYPE_V8QI_V8QI:
24720 case V4HI_FTYPE_V2SI_V2SI:
24721 case V4DF_FTYPE_V4DF_V4DF:
24722 case V4DF_FTYPE_V4DF_V4DI:
24723 case V4SF_FTYPE_V4SF_V4SF:
24724 case V4SF_FTYPE_V4SF_V4SI:
24725 case V4SF_FTYPE_V4SF_V2SI:
24726 case V4SF_FTYPE_V4SF_V2DF:
24727 case V4SF_FTYPE_V4SF_DI:
24728 case V4SF_FTYPE_V4SF_SI:
24729 case V2DI_FTYPE_V2DI_V2DI:
24730 case V2DI_FTYPE_V16QI_V16QI:
24731 case V2DI_FTYPE_V4SI_V4SI:
24732 case V2DI_FTYPE_V2DI_V16QI:
24733 case V2DI_FTYPE_V2DF_V2DF:
24734 case V2SI_FTYPE_V2SI_V2SI:
24735 case V2SI_FTYPE_V4HI_V4HI:
24736 case V2SI_FTYPE_V2SF_V2SF:
24737 case V2DF_FTYPE_V2DF_V2DF:
24738 case V2DF_FTYPE_V2DF_V4SF:
24739 case V2DF_FTYPE_V2DF_V2DI:
24740 case V2DF_FTYPE_V2DF_DI:
24741 case V2DF_FTYPE_V2DF_SI:
24742 case V2SF_FTYPE_V2SF_V2SF:
24743 case V1DI_FTYPE_V1DI_V1DI:
24744 case V1DI_FTYPE_V8QI_V8QI:
24745 case V1DI_FTYPE_V2SI_V2SI:
24746 if (comparison == UNKNOWN)
24747 return ix86_expand_binop_builtin (icode, exp, target);
24750 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24751 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24752 gcc_assert (comparison != UNKNOWN);
24756 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24757 case V8HI_FTYPE_V8HI_SI_COUNT:
24758 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24759 case V4SI_FTYPE_V4SI_SI_COUNT:
24760 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24761 case V4HI_FTYPE_V4HI_SI_COUNT:
24762 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24763 case V2DI_FTYPE_V2DI_SI_COUNT:
24764 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24765 case V2SI_FTYPE_V2SI_SI_COUNT:
24766 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24767 case V1DI_FTYPE_V1DI_SI_COUNT:
24769 last_arg_count = true;
24771 case UINT64_FTYPE_UINT64_UINT64:
24772 case UINT_FTYPE_UINT_UINT:
24773 case UINT_FTYPE_UINT_USHORT:
24774 case UINT_FTYPE_UINT_UCHAR:
24775 case UINT16_FTYPE_UINT16_INT:
24776 case UINT8_FTYPE_UINT8_INT:
24779 case V2DI_FTYPE_V2DI_INT_CONVERT:
24782 nargs_constant = 1;
24784 case V8HI_FTYPE_V8HI_INT:
24785 case V8HI_FTYPE_V8SF_INT:
24786 case V8HI_FTYPE_V4SF_INT:
24787 case V8SF_FTYPE_V8SF_INT:
24788 case V4SI_FTYPE_V4SI_INT:
24789 case V4SI_FTYPE_V8SI_INT:
24790 case V4HI_FTYPE_V4HI_INT:
24791 case V4DF_FTYPE_V4DF_INT:
24792 case V4SF_FTYPE_V4SF_INT:
24793 case V4SF_FTYPE_V8SF_INT:
24794 case V2DI_FTYPE_V2DI_INT:
24795 case V2DF_FTYPE_V2DF_INT:
24796 case V2DF_FTYPE_V4DF_INT:
24798 nargs_constant = 1;
24800 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24801 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24802 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24803 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24804 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24807 case V16QI_FTYPE_V16QI_V16QI_INT:
24808 case V8HI_FTYPE_V8HI_V8HI_INT:
24809 case V8SI_FTYPE_V8SI_V8SI_INT:
24810 case V8SI_FTYPE_V8SI_V4SI_INT:
24811 case V8SF_FTYPE_V8SF_V8SF_INT:
24812 case V8SF_FTYPE_V8SF_V4SF_INT:
24813 case V4SI_FTYPE_V4SI_V4SI_INT:
24814 case V4DF_FTYPE_V4DF_V4DF_INT:
24815 case V4DF_FTYPE_V4DF_V2DF_INT:
24816 case V4SF_FTYPE_V4SF_V4SF_INT:
24817 case V2DI_FTYPE_V2DI_V2DI_INT:
24818 case V2DF_FTYPE_V2DF_V2DF_INT:
24820 nargs_constant = 1;
24822 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24825 nargs_constant = 1;
24827 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24830 nargs_constant = 1;
24832 case V2DI_FTYPE_V2DI_UINT_UINT:
24834 nargs_constant = 2;
24836 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24837 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24838 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24839 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24841 nargs_constant = 1;
24843 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24845 nargs_constant = 2;
24848 gcc_unreachable ();
24851 gcc_assert (nargs <= ARRAY_SIZE (args));
24853 if (comparison != UNKNOWN)
24855 gcc_assert (nargs == 2);
24856 return ix86_expand_sse_compare (d, exp, target, swap);
24859 if (rmode == VOIDmode || rmode == tmode)
24863 || GET_MODE (target) != tmode
24864 || !insn_p->operand[0].predicate (target, tmode))
24865 target = gen_reg_rtx (tmode);
24866 real_target = target;
24870 target = gen_reg_rtx (rmode);
24871 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24874 for (i = 0; i < nargs; i++)
24876 tree arg = CALL_EXPR_ARG (exp, i);
24877 rtx op = expand_normal (arg);
24878 enum machine_mode mode = insn_p->operand[i + 1].mode;
24879 bool match = insn_p->operand[i + 1].predicate (op, mode);
24881 if (last_arg_count && (i + 1) == nargs)
24883 /* SIMD shift insns take either an 8-bit immediate or
24884 register as count. But builtin functions take int as
24885 count. If count doesn't match, we put it in register. */
24888 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24889 if (!insn_p->operand[i + 1].predicate (op, mode))
24890 op = copy_to_reg (op);
24893 else if ((nargs - i) <= nargs_constant)
24898 case CODE_FOR_sse4_1_roundpd:
24899 case CODE_FOR_sse4_1_roundps:
24900 case CODE_FOR_sse4_1_roundsd:
24901 case CODE_FOR_sse4_1_roundss:
24902 case CODE_FOR_sse4_1_blendps:
24903 case CODE_FOR_avx_blendpd256:
24904 case CODE_FOR_avx_vpermilv4df:
24905 case CODE_FOR_avx_roundpd256:
24906 case CODE_FOR_avx_roundps256:
24907 error ("the last argument must be a 4-bit immediate");
24910 case CODE_FOR_sse4_1_blendpd:
24911 case CODE_FOR_avx_vpermilv2df:
24912 case CODE_FOR_xop_vpermil2v2df3:
24913 case CODE_FOR_xop_vpermil2v4sf3:
24914 case CODE_FOR_xop_vpermil2v4df3:
24915 case CODE_FOR_xop_vpermil2v8sf3:
24916 error ("the last argument must be a 2-bit immediate");
24919 case CODE_FOR_avx_vextractf128v4df:
24920 case CODE_FOR_avx_vextractf128v8sf:
24921 case CODE_FOR_avx_vextractf128v8si:
24922 case CODE_FOR_avx_vinsertf128v4df:
24923 case CODE_FOR_avx_vinsertf128v8sf:
24924 case CODE_FOR_avx_vinsertf128v8si:
24925 error ("the last argument must be a 1-bit immediate");
24928 case CODE_FOR_avx_cmpsdv2df3:
24929 case CODE_FOR_avx_cmpssv4sf3:
24930 case CODE_FOR_avx_cmppdv2df3:
24931 case CODE_FOR_avx_cmppsv4sf3:
24932 case CODE_FOR_avx_cmppdv4df3:
24933 case CODE_FOR_avx_cmppsv8sf3:
24934 error ("the last argument must be a 5-bit immediate");
24938 switch (nargs_constant)
24941 if ((nargs - i) == nargs_constant)
24943 error ("the next to last argument must be an 8-bit immediate");
24947 error ("the last argument must be an 8-bit immediate");
24950 gcc_unreachable ();
24957 if (VECTOR_MODE_P (mode))
24958 op = safe_vector_operand (op, mode);
24960 /* If we aren't optimizing, only allow one memory operand to
24962 if (memory_operand (op, mode))
24965 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24967 if (optimize || !match || num_memory > 1)
24968 op = copy_to_mode_reg (mode, op);
24972 op = copy_to_reg (op);
24973 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24978 args[i].mode = mode;
24984 pat = GEN_FCN (icode) (real_target, args[0].op);
24987 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24990 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24994 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24995 args[2].op, args[3].op);
24998 gcc_unreachable ();
25008 /* Subroutine of ix86_expand_builtin to take care of special insns
25009 with variable number of operands. */
25012 ix86_expand_special_args_builtin (const struct builtin_description *d,
25013 tree exp, rtx target)
25017 unsigned int i, nargs, arg_adjust, memory;
25021 enum machine_mode mode;
25023 enum insn_code icode = d->icode;
25024 bool last_arg_constant = false;
25025 const struct insn_data_d *insn_p = &insn_data[icode];
25026 enum machine_mode tmode = insn_p->operand[0].mode;
25027 enum { load, store } klass;
25029 switch ((enum ix86_builtin_func_type) d->flag)
25031 case VOID_FTYPE_VOID:
25032 emit_insn (GEN_FCN (icode) (target));
25034 case VOID_FTYPE_UINT64:
25035 case VOID_FTYPE_UNSIGNED:
25041 case UINT64_FTYPE_VOID:
25042 case UNSIGNED_FTYPE_VOID:
25043 case UINT16_FTYPE_VOID:
25048 case UINT64_FTYPE_PUNSIGNED:
25049 case V2DI_FTYPE_PV2DI:
25050 case V32QI_FTYPE_PCCHAR:
25051 case V16QI_FTYPE_PCCHAR:
25052 case V8SF_FTYPE_PCV4SF:
25053 case V8SF_FTYPE_PCFLOAT:
25054 case V4SF_FTYPE_PCFLOAT:
25055 case V4DF_FTYPE_PCV2DF:
25056 case V4DF_FTYPE_PCDOUBLE:
25057 case V2DF_FTYPE_PCDOUBLE:
25058 case VOID_FTYPE_PVOID:
25063 case VOID_FTYPE_PV2SF_V4SF:
25064 case VOID_FTYPE_PV4DI_V4DI:
25065 case VOID_FTYPE_PV2DI_V2DI:
25066 case VOID_FTYPE_PCHAR_V32QI:
25067 case VOID_FTYPE_PCHAR_V16QI:
25068 case VOID_FTYPE_PFLOAT_V8SF:
25069 case VOID_FTYPE_PFLOAT_V4SF:
25070 case VOID_FTYPE_PDOUBLE_V4DF:
25071 case VOID_FTYPE_PDOUBLE_V2DF:
25072 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25073 case VOID_FTYPE_PINT_INT:
25076 /* Reserve memory operand for target. */
25077 memory = ARRAY_SIZE (args);
25079 case V4SF_FTYPE_V4SF_PCV2SF:
25080 case V2DF_FTYPE_V2DF_PCDOUBLE:
25085 case V8SF_FTYPE_PCV8SF_V8SF:
25086 case V4DF_FTYPE_PCV4DF_V4DF:
25087 case V4SF_FTYPE_PCV4SF_V4SF:
25088 case V2DF_FTYPE_PCV2DF_V2DF:
25093 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25094 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25095 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25096 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25099 /* Reserve memory operand for target. */
25100 memory = ARRAY_SIZE (args);
25102 case VOID_FTYPE_UINT_UINT_UINT:
25103 case VOID_FTYPE_UINT64_UINT_UINT:
25104 case UCHAR_FTYPE_UINT_UINT_UINT:
25105 case UCHAR_FTYPE_UINT64_UINT_UINT:
25108 memory = ARRAY_SIZE (args);
25109 last_arg_constant = true;
25112 gcc_unreachable ();
25115 gcc_assert (nargs <= ARRAY_SIZE (args));
25117 if (klass == store)
25119 arg = CALL_EXPR_ARG (exp, 0);
25120 op = expand_normal (arg);
25121 gcc_assert (target == 0);
25123 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25125 target = force_reg (tmode, op);
25133 || GET_MODE (target) != tmode
25134 || !insn_p->operand[0].predicate (target, tmode))
25135 target = gen_reg_rtx (tmode);
25138 for (i = 0; i < nargs; i++)
25140 enum machine_mode mode = insn_p->operand[i + 1].mode;
25143 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25144 op = expand_normal (arg);
25145 match = insn_p->operand[i + 1].predicate (op, mode);
25147 if (last_arg_constant && (i + 1) == nargs)
25151 if (icode == CODE_FOR_lwp_lwpvalsi3
25152 || icode == CODE_FOR_lwp_lwpinssi3
25153 || icode == CODE_FOR_lwp_lwpvaldi3
25154 || icode == CODE_FOR_lwp_lwpinsdi3)
25155 error ("the last argument must be a 32-bit immediate");
25157 error ("the last argument must be an 8-bit immediate");
25165 /* This must be the memory operand. */
25166 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25167 gcc_assert (GET_MODE (op) == mode
25168 || GET_MODE (op) == VOIDmode);
25172 /* This must be register. */
25173 if (VECTOR_MODE_P (mode))
25174 op = safe_vector_operand (op, mode);
25176 gcc_assert (GET_MODE (op) == mode
25177 || GET_MODE (op) == VOIDmode);
25178 op = copy_to_mode_reg (mode, op);
25183 args[i].mode = mode;
25189 pat = GEN_FCN (icode) (target);
25192 pat = GEN_FCN (icode) (target, args[0].op);
25195 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25198 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25201 gcc_unreachable ();
25207 return klass == store ? 0 : target;
25210 /* Return the integer constant in ARG. Constrain it to be in the range
25211 of the subparts of VEC_TYPE; issue an error if not. */
25214 get_element_number (tree vec_type, tree arg)
25216 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25218 if (!host_integerp (arg, 1)
25219 || (elt = tree_low_cst (arg, 1), elt > max))
25221 error ("selector must be an integer constant in the range 0..%wi", max);
25228 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25229 ix86_expand_vector_init. We DO have language-level syntax for this, in
25230 the form of (type){ init-list }. Except that since we can't place emms
25231 instructions from inside the compiler, we can't allow the use of MMX
25232 registers unless the user explicitly asks for it. So we do *not* define
25233 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25234 we have builtins invoked by mmintrin.h that gives us license to emit
25235 these sorts of instructions. */
25238 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25240 enum machine_mode tmode = TYPE_MODE (type);
25241 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25242 int i, n_elt = GET_MODE_NUNITS (tmode);
25243 rtvec v = rtvec_alloc (n_elt);
25245 gcc_assert (VECTOR_MODE_P (tmode));
25246 gcc_assert (call_expr_nargs (exp) == n_elt);
25248 for (i = 0; i < n_elt; ++i)
25250 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25251 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25254 if (!target || !register_operand (target, tmode))
25255 target = gen_reg_rtx (tmode);
25257 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25261 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25262 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25263 had a language-level syntax for referencing vector elements. */
25266 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25268 enum machine_mode tmode, mode0;
25273 arg0 = CALL_EXPR_ARG (exp, 0);
25274 arg1 = CALL_EXPR_ARG (exp, 1);
25276 op0 = expand_normal (arg0);
25277 elt = get_element_number (TREE_TYPE (arg0), arg1);
25279 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25280 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25281 gcc_assert (VECTOR_MODE_P (mode0));
25283 op0 = force_reg (mode0, op0);
25285 if (optimize || !target || !register_operand (target, tmode))
25286 target = gen_reg_rtx (tmode);
25288 ix86_expand_vector_extract (true, target, op0, elt);
25293 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25294 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25295 a language-level syntax for referencing vector elements. */
25298 ix86_expand_vec_set_builtin (tree exp)
25300 enum machine_mode tmode, mode1;
25301 tree arg0, arg1, arg2;
25303 rtx op0, op1, target;
25305 arg0 = CALL_EXPR_ARG (exp, 0);
25306 arg1 = CALL_EXPR_ARG (exp, 1);
25307 arg2 = CALL_EXPR_ARG (exp, 2);
25309 tmode = TYPE_MODE (TREE_TYPE (arg0));
25310 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25311 gcc_assert (VECTOR_MODE_P (tmode));
25313 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25314 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25315 elt = get_element_number (TREE_TYPE (arg0), arg2);
25317 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25318 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25320 op0 = force_reg (tmode, op0);
25321 op1 = force_reg (mode1, op1);
25323 /* OP0 is the source of these builtin functions and shouldn't be
25324 modified. Create a copy, use it and return it as target. */
25325 target = gen_reg_rtx (tmode);
25326 emit_move_insn (target, op0);
25327 ix86_expand_vector_set (true, target, op1, elt);
25332 /* Expand an expression EXP that calls a built-in function,
25333 with result going to TARGET if that's convenient
25334 (and in mode MODE if that's convenient).
25335 SUBTARGET may be used as the target for computing one of EXP's operands.
25336 IGNORE is nonzero if the value is to be ignored. */
25339 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25340 enum machine_mode mode ATTRIBUTE_UNUSED,
25341 int ignore ATTRIBUTE_UNUSED)
25343 const struct builtin_description *d;
25345 enum insn_code icode;
25346 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25347 tree arg0, arg1, arg2;
25348 rtx op0, op1, op2, pat;
25349 enum machine_mode mode0, mode1, mode2;
25350 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25352 /* Determine whether the builtin function is available under the current ISA.
25353 Originally the builtin was not created if it wasn't applicable to the
25354 current ISA based on the command line switches. With function specific
25355 options, we need to check in the context of the function making the call
25356 whether it is supported. */
25357 if (ix86_builtins_isa[fcode].isa
25358 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25360 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25361 NULL, NULL, false);
25364 error ("%qE needs unknown isa option", fndecl);
25367 gcc_assert (opts != NULL);
25368 error ("%qE needs isa option %s", fndecl, opts);
25376 case IX86_BUILTIN_MASKMOVQ:
25377 case IX86_BUILTIN_MASKMOVDQU:
25378 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25379 ? CODE_FOR_mmx_maskmovq
25380 : CODE_FOR_sse2_maskmovdqu);
25381 /* Note the arg order is different from the operand order. */
25382 arg1 = CALL_EXPR_ARG (exp, 0);
25383 arg2 = CALL_EXPR_ARG (exp, 1);
25384 arg0 = CALL_EXPR_ARG (exp, 2);
25385 op0 = expand_normal (arg0);
25386 op1 = expand_normal (arg1);
25387 op2 = expand_normal (arg2);
25388 mode0 = insn_data[icode].operand[0].mode;
25389 mode1 = insn_data[icode].operand[1].mode;
25390 mode2 = insn_data[icode].operand[2].mode;
25392 op0 = force_reg (Pmode, op0);
25393 op0 = gen_rtx_MEM (mode1, op0);
25395 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25396 op0 = copy_to_mode_reg (mode0, op0);
25397 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25398 op1 = copy_to_mode_reg (mode1, op1);
25399 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25400 op2 = copy_to_mode_reg (mode2, op2);
25401 pat = GEN_FCN (icode) (op0, op1, op2);
25407 case IX86_BUILTIN_LDMXCSR:
25408 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25409 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25410 emit_move_insn (target, op0);
25411 emit_insn (gen_sse_ldmxcsr (target));
25414 case IX86_BUILTIN_STMXCSR:
25415 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25416 emit_insn (gen_sse_stmxcsr (target));
25417 return copy_to_mode_reg (SImode, target);
25419 case IX86_BUILTIN_CLFLUSH:
25420 arg0 = CALL_EXPR_ARG (exp, 0);
25421 op0 = expand_normal (arg0);
25422 icode = CODE_FOR_sse2_clflush;
25423 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25424 op0 = copy_to_mode_reg (Pmode, op0);
25426 emit_insn (gen_sse2_clflush (op0));
25429 case IX86_BUILTIN_MONITOR:
25430 arg0 = CALL_EXPR_ARG (exp, 0);
25431 arg1 = CALL_EXPR_ARG (exp, 1);
25432 arg2 = CALL_EXPR_ARG (exp, 2);
25433 op0 = expand_normal (arg0);
25434 op1 = expand_normal (arg1);
25435 op2 = expand_normal (arg2);
25437 op0 = copy_to_mode_reg (Pmode, op0);
25439 op1 = copy_to_mode_reg (SImode, op1);
25441 op2 = copy_to_mode_reg (SImode, op2);
25442 emit_insn (ix86_gen_monitor (op0, op1, op2));
25445 case IX86_BUILTIN_MWAIT:
25446 arg0 = CALL_EXPR_ARG (exp, 0);
25447 arg1 = CALL_EXPR_ARG (exp, 1);
25448 op0 = expand_normal (arg0);
25449 op1 = expand_normal (arg1);
25451 op0 = copy_to_mode_reg (SImode, op0);
25453 op1 = copy_to_mode_reg (SImode, op1);
25454 emit_insn (gen_sse3_mwait (op0, op1));
25457 case IX86_BUILTIN_VEC_INIT_V2SI:
25458 case IX86_BUILTIN_VEC_INIT_V4HI:
25459 case IX86_BUILTIN_VEC_INIT_V8QI:
25460 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25462 case IX86_BUILTIN_VEC_EXT_V2DF:
25463 case IX86_BUILTIN_VEC_EXT_V2DI:
25464 case IX86_BUILTIN_VEC_EXT_V4SF:
25465 case IX86_BUILTIN_VEC_EXT_V4SI:
25466 case IX86_BUILTIN_VEC_EXT_V8HI:
25467 case IX86_BUILTIN_VEC_EXT_V2SI:
25468 case IX86_BUILTIN_VEC_EXT_V4HI:
25469 case IX86_BUILTIN_VEC_EXT_V16QI:
25470 return ix86_expand_vec_ext_builtin (exp, target);
25472 case IX86_BUILTIN_VEC_SET_V2DI:
25473 case IX86_BUILTIN_VEC_SET_V4SF:
25474 case IX86_BUILTIN_VEC_SET_V4SI:
25475 case IX86_BUILTIN_VEC_SET_V8HI:
25476 case IX86_BUILTIN_VEC_SET_V4HI:
25477 case IX86_BUILTIN_VEC_SET_V16QI:
25478 return ix86_expand_vec_set_builtin (exp);
25480 case IX86_BUILTIN_VEC_PERM_V2DF:
25481 case IX86_BUILTIN_VEC_PERM_V4SF:
25482 case IX86_BUILTIN_VEC_PERM_V2DI:
25483 case IX86_BUILTIN_VEC_PERM_V4SI:
25484 case IX86_BUILTIN_VEC_PERM_V8HI:
25485 case IX86_BUILTIN_VEC_PERM_V16QI:
25486 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25487 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25488 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25489 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25490 case IX86_BUILTIN_VEC_PERM_V4DF:
25491 case IX86_BUILTIN_VEC_PERM_V8SF:
25492 return ix86_expand_vec_perm_builtin (exp);
25494 case IX86_BUILTIN_INFQ:
25495 case IX86_BUILTIN_HUGE_VALQ:
25497 REAL_VALUE_TYPE inf;
25501 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25503 tmp = validize_mem (force_const_mem (mode, tmp));
25506 target = gen_reg_rtx (mode);
25508 emit_move_insn (target, tmp);
25512 case IX86_BUILTIN_LLWPCB:
25513 arg0 = CALL_EXPR_ARG (exp, 0);
25514 op0 = expand_normal (arg0);
25515 icode = CODE_FOR_lwp_llwpcb;
25516 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25517 op0 = copy_to_mode_reg (Pmode, op0);
25518 emit_insn (gen_lwp_llwpcb (op0));
25521 case IX86_BUILTIN_SLWPCB:
25522 icode = CODE_FOR_lwp_slwpcb;
25524 || !insn_data[icode].operand[0].predicate (target, Pmode))
25525 target = gen_reg_rtx (Pmode);
25526 emit_insn (gen_lwp_slwpcb (target));
25533 for (i = 0, d = bdesc_special_args;
25534 i < ARRAY_SIZE (bdesc_special_args);
25536 if (d->code == fcode)
25537 return ix86_expand_special_args_builtin (d, exp, target);
25539 for (i = 0, d = bdesc_args;
25540 i < ARRAY_SIZE (bdesc_args);
25542 if (d->code == fcode)
25545 case IX86_BUILTIN_FABSQ:
25546 case IX86_BUILTIN_COPYSIGNQ:
25548 /* Emit a normal call if SSE2 isn't available. */
25549 return expand_call (exp, target, ignore);
25551 return ix86_expand_args_builtin (d, exp, target);
25554 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25555 if (d->code == fcode)
25556 return ix86_expand_sse_comi (d, exp, target);
25558 for (i = 0, d = bdesc_pcmpestr;
25559 i < ARRAY_SIZE (bdesc_pcmpestr);
25561 if (d->code == fcode)
25562 return ix86_expand_sse_pcmpestr (d, exp, target);
25564 for (i = 0, d = bdesc_pcmpistr;
25565 i < ARRAY_SIZE (bdesc_pcmpistr);
25567 if (d->code == fcode)
25568 return ix86_expand_sse_pcmpistr (d, exp, target);
25570 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25571 if (d->code == fcode)
25572 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25573 (enum ix86_builtin_func_type)
25574 d->flag, d->comparison);
25576 gcc_unreachable ();
25579 /* Returns a function decl for a vectorized version of the builtin function
25580 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25581 if it is not available. */
25584 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25587 enum machine_mode in_mode, out_mode;
25589 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25591 if (TREE_CODE (type_out) != VECTOR_TYPE
25592 || TREE_CODE (type_in) != VECTOR_TYPE
25593 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25596 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25597 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25598 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25599 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25603 case BUILT_IN_SQRT:
25604 if (out_mode == DFmode && out_n == 2
25605 && in_mode == DFmode && in_n == 2)
25606 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25609 case BUILT_IN_SQRTF:
25610 if (out_mode == SFmode && out_n == 4
25611 && in_mode == SFmode && in_n == 4)
25612 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25615 case BUILT_IN_LRINT:
25616 if (out_mode == SImode && out_n == 4
25617 && in_mode == DFmode && in_n == 2)
25618 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25621 case BUILT_IN_LRINTF:
25622 if (out_mode == SImode && out_n == 4
25623 && in_mode == SFmode && in_n == 4)
25624 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25627 case BUILT_IN_COPYSIGN:
25628 if (out_mode == DFmode && out_n == 2
25629 && in_mode == DFmode && in_n == 2)
25630 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25633 case BUILT_IN_COPYSIGNF:
25634 if (out_mode == SFmode && out_n == 4
25635 && in_mode == SFmode && in_n == 4)
25636 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25643 /* Dispatch to a handler for a vectorization library. */
25644 if (ix86_veclib_handler)
25645 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25651 /* Handler for an SVML-style interface to
25652 a library with vectorized intrinsics. */
25655 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25658 tree fntype, new_fndecl, args;
25661 enum machine_mode el_mode, in_mode;
25664 /* The SVML is suitable for unsafe math only. */
25665 if (!flag_unsafe_math_optimizations)
25668 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25669 n = TYPE_VECTOR_SUBPARTS (type_out);
25670 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25671 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25672 if (el_mode != in_mode
25680 case BUILT_IN_LOG10:
25682 case BUILT_IN_TANH:
25684 case BUILT_IN_ATAN:
25685 case BUILT_IN_ATAN2:
25686 case BUILT_IN_ATANH:
25687 case BUILT_IN_CBRT:
25688 case BUILT_IN_SINH:
25690 case BUILT_IN_ASINH:
25691 case BUILT_IN_ASIN:
25692 case BUILT_IN_COSH:
25694 case BUILT_IN_ACOSH:
25695 case BUILT_IN_ACOS:
25696 if (el_mode != DFmode || n != 2)
25700 case BUILT_IN_EXPF:
25701 case BUILT_IN_LOGF:
25702 case BUILT_IN_LOG10F:
25703 case BUILT_IN_POWF:
25704 case BUILT_IN_TANHF:
25705 case BUILT_IN_TANF:
25706 case BUILT_IN_ATANF:
25707 case BUILT_IN_ATAN2F:
25708 case BUILT_IN_ATANHF:
25709 case BUILT_IN_CBRTF:
25710 case BUILT_IN_SINHF:
25711 case BUILT_IN_SINF:
25712 case BUILT_IN_ASINHF:
25713 case BUILT_IN_ASINF:
25714 case BUILT_IN_COSHF:
25715 case BUILT_IN_COSF:
25716 case BUILT_IN_ACOSHF:
25717 case BUILT_IN_ACOSF:
25718 if (el_mode != SFmode || n != 4)
25726 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25728 if (fn == BUILT_IN_LOGF)
25729 strcpy (name, "vmlsLn4");
25730 else if (fn == BUILT_IN_LOG)
25731 strcpy (name, "vmldLn2");
25734 sprintf (name, "vmls%s", bname+10);
25735 name[strlen (name)-1] = '4';
25738 sprintf (name, "vmld%s2", bname+10);
25740 /* Convert to uppercase. */
25744 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25745 args = TREE_CHAIN (args))
25749 fntype = build_function_type_list (type_out, type_in, NULL);
25751 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25753 /* Build a function declaration for the vectorized function. */
25754 new_fndecl = build_decl (BUILTINS_LOCATION,
25755 FUNCTION_DECL, get_identifier (name), fntype);
25756 TREE_PUBLIC (new_fndecl) = 1;
25757 DECL_EXTERNAL (new_fndecl) = 1;
25758 DECL_IS_NOVOPS (new_fndecl) = 1;
25759 TREE_READONLY (new_fndecl) = 1;
25764 /* Handler for an ACML-style interface to
25765 a library with vectorized intrinsics. */
25768 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25770 char name[20] = "__vr.._";
25771 tree fntype, new_fndecl, args;
25774 enum machine_mode el_mode, in_mode;
25777 /* The ACML is 64bits only and suitable for unsafe math only as
25778 it does not correctly support parts of IEEE with the required
25779 precision such as denormals. */
25781 || !flag_unsafe_math_optimizations)
25784 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25785 n = TYPE_VECTOR_SUBPARTS (type_out);
25786 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25787 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25788 if (el_mode != in_mode
25798 case BUILT_IN_LOG2:
25799 case BUILT_IN_LOG10:
25802 if (el_mode != DFmode
25807 case BUILT_IN_SINF:
25808 case BUILT_IN_COSF:
25809 case BUILT_IN_EXPF:
25810 case BUILT_IN_POWF:
25811 case BUILT_IN_LOGF:
25812 case BUILT_IN_LOG2F:
25813 case BUILT_IN_LOG10F:
25816 if (el_mode != SFmode
25825 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25826 sprintf (name + 7, "%s", bname+10);
25829 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25830 args = TREE_CHAIN (args))
25834 fntype = build_function_type_list (type_out, type_in, NULL);
25836 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25838 /* Build a function declaration for the vectorized function. */
25839 new_fndecl = build_decl (BUILTINS_LOCATION,
25840 FUNCTION_DECL, get_identifier (name), fntype);
25841 TREE_PUBLIC (new_fndecl) = 1;
25842 DECL_EXTERNAL (new_fndecl) = 1;
25843 DECL_IS_NOVOPS (new_fndecl) = 1;
25844 TREE_READONLY (new_fndecl) = 1;
25850 /* Returns a decl of a function that implements conversion of an integer vector
25851 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25852 are the types involved when converting according to CODE.
25853 Return NULL_TREE if it is not available. */
25856 ix86_vectorize_builtin_conversion (unsigned int code,
25857 tree dest_type, tree src_type)
25865 switch (TYPE_MODE (src_type))
25868 switch (TYPE_MODE (dest_type))
25871 return (TYPE_UNSIGNED (src_type)
25872 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25873 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25875 return (TYPE_UNSIGNED (src_type)
25877 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25883 switch (TYPE_MODE (dest_type))
25886 return (TYPE_UNSIGNED (src_type)
25888 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25897 case FIX_TRUNC_EXPR:
25898 switch (TYPE_MODE (dest_type))
25901 switch (TYPE_MODE (src_type))
25904 return (TYPE_UNSIGNED (dest_type)
25906 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25908 return (TYPE_UNSIGNED (dest_type)
25910 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25917 switch (TYPE_MODE (src_type))
25920 return (TYPE_UNSIGNED (dest_type)
25922 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25939 /* Returns a code for a target-specific builtin that implements
25940 reciprocal of the function, or NULL_TREE if not available. */
25943 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25944 bool sqrt ATTRIBUTE_UNUSED)
25946 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25947 && flag_finite_math_only && !flag_trapping_math
25948 && flag_unsafe_math_optimizations))
25952 /* Machine dependent builtins. */
25955 /* Vectorized version of sqrt to rsqrt conversion. */
25956 case IX86_BUILTIN_SQRTPS_NR:
25957 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25963 /* Normal builtins. */
25966 /* Sqrt to rsqrt conversion. */
25967 case BUILT_IN_SQRTF:
25968 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25975 /* Helper for avx_vpermilps256_operand et al. This is also used by
25976 the expansion functions to turn the parallel back into a mask.
25977 The return value is 0 for no match and the imm8+1 for a match. */
25980 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25982 unsigned i, nelt = GET_MODE_NUNITS (mode);
25984 unsigned char ipar[8];
25986 if (XVECLEN (par, 0) != (int) nelt)
25989 /* Validate that all of the elements are constants, and not totally
25990 out of range. Copy the data into an integral array to make the
25991 subsequent checks easier. */
25992 for (i = 0; i < nelt; ++i)
25994 rtx er = XVECEXP (par, 0, i);
25995 unsigned HOST_WIDE_INT ei;
25997 if (!CONST_INT_P (er))
26008 /* In the 256-bit DFmode case, we can only move elements within
26010 for (i = 0; i < 2; ++i)
26014 mask |= ipar[i] << i;
26016 for (i = 2; i < 4; ++i)
26020 mask |= (ipar[i] - 2) << i;
26025 /* In the 256-bit SFmode case, we have full freedom of movement
26026 within the low 128-bit lane, but the high 128-bit lane must
26027 mirror the exact same pattern. */
26028 for (i = 0; i < 4; ++i)
26029 if (ipar[i] + 4 != ipar[i + 4])
26036 /* In the 128-bit case, we've full freedom in the placement of
26037 the elements from the source operand. */
26038 for (i = 0; i < nelt; ++i)
26039 mask |= ipar[i] << (i * (nelt / 2));
26043 gcc_unreachable ();
26046 /* Make sure success has a non-zero value by adding one. */
26050 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26051 the expansion functions to turn the parallel back into a mask.
26052 The return value is 0 for no match and the imm8+1 for a match. */
26055 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26057 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26059 unsigned char ipar[8];
26061 if (XVECLEN (par, 0) != (int) nelt)
26064 /* Validate that all of the elements are constants, and not totally
26065 out of range. Copy the data into an integral array to make the
26066 subsequent checks easier. */
26067 for (i = 0; i < nelt; ++i)
26069 rtx er = XVECEXP (par, 0, i);
26070 unsigned HOST_WIDE_INT ei;
26072 if (!CONST_INT_P (er))
26075 if (ei >= 2 * nelt)
26080 /* Validate that the halves of the permute are halves. */
26081 for (i = 0; i < nelt2 - 1; ++i)
26082 if (ipar[i] + 1 != ipar[i + 1])
26084 for (i = nelt2; i < nelt - 1; ++i)
26085 if (ipar[i] + 1 != ipar[i + 1])
26088 /* Reconstruct the mask. */
26089 for (i = 0; i < 2; ++i)
26091 unsigned e = ipar[i * nelt2];
26095 mask |= e << (i * 4);
26098 /* Make sure success has a non-zero value by adding one. */
26103 /* Store OPERAND to the memory after reload is completed. This means
26104 that we can't easily use assign_stack_local. */
26106 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26110 gcc_assert (reload_completed);
26111 if (ix86_using_red_zone ())
26113 result = gen_rtx_MEM (mode,
26114 gen_rtx_PLUS (Pmode,
26116 GEN_INT (-RED_ZONE_SIZE)));
26117 emit_move_insn (result, operand);
26119 else if (TARGET_64BIT)
26125 operand = gen_lowpart (DImode, operand);
26129 gen_rtx_SET (VOIDmode,
26130 gen_rtx_MEM (DImode,
26131 gen_rtx_PRE_DEC (DImode,
26132 stack_pointer_rtx)),
26136 gcc_unreachable ();
26138 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26147 split_di (&operand, 1, operands, operands + 1);
26149 gen_rtx_SET (VOIDmode,
26150 gen_rtx_MEM (SImode,
26151 gen_rtx_PRE_DEC (Pmode,
26152 stack_pointer_rtx)),
26155 gen_rtx_SET (VOIDmode,
26156 gen_rtx_MEM (SImode,
26157 gen_rtx_PRE_DEC (Pmode,
26158 stack_pointer_rtx)),
26163 /* Store HImodes as SImodes. */
26164 operand = gen_lowpart (SImode, operand);
26168 gen_rtx_SET (VOIDmode,
26169 gen_rtx_MEM (GET_MODE (operand),
26170 gen_rtx_PRE_DEC (SImode,
26171 stack_pointer_rtx)),
26175 gcc_unreachable ();
26177 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26182 /* Free operand from the memory. */
26184 ix86_free_from_memory (enum machine_mode mode)
26186 if (!ix86_using_red_zone ())
26190 if (mode == DImode || TARGET_64BIT)
26194 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26195 to pop or add instruction if registers are available. */
26196 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26197 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26202 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26203 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26205 static const reg_class_t *
26206 i386_ira_cover_classes (void)
26208 static const reg_class_t sse_fpmath_classes[] = {
26209 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26211 static const reg_class_t no_sse_fpmath_classes[] = {
26212 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26215 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26218 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26219 QImode must go into class Q_REGS.
26220 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26221 movdf to do mem-to-mem moves through integer regs. */
26223 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26225 enum machine_mode mode = GET_MODE (x);
26227 /* We're only allowed to return a subclass of CLASS. Many of the
26228 following checks fail for NO_REGS, so eliminate that early. */
26229 if (regclass == NO_REGS)
26232 /* All classes can load zeros. */
26233 if (x == CONST0_RTX (mode))
26236 /* Force constants into memory if we are loading a (nonzero) constant into
26237 an MMX or SSE register. This is because there are no MMX/SSE instructions
26238 to load from a constant. */
26240 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26243 /* Prefer SSE regs only, if we can use them for math. */
26244 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26245 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26247 /* Floating-point constants need more complex checks. */
26248 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26250 /* General regs can load everything. */
26251 if (reg_class_subset_p (regclass, GENERAL_REGS))
26254 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26255 zero above. We only want to wind up preferring 80387 registers if
26256 we plan on doing computation with them. */
26258 && standard_80387_constant_p (x))
26260 /* Limit class to non-sse. */
26261 if (regclass == FLOAT_SSE_REGS)
26263 if (regclass == FP_TOP_SSE_REGS)
26265 if (regclass == FP_SECOND_SSE_REGS)
26266 return FP_SECOND_REG;
26267 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26274 /* Generally when we see PLUS here, it's the function invariant
26275 (plus soft-fp const_int). Which can only be computed into general
26277 if (GET_CODE (x) == PLUS)
26278 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26280 /* QImode constants are easy to load, but non-constant QImode data
26281 must go into Q_REGS. */
26282 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26284 if (reg_class_subset_p (regclass, Q_REGS))
26286 if (reg_class_subset_p (Q_REGS, regclass))
26294 /* Discourage putting floating-point values in SSE registers unless
26295 SSE math is being used, and likewise for the 387 registers. */
26297 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26299 enum machine_mode mode = GET_MODE (x);
26301 /* Restrict the output reload class to the register bank that we are doing
26302 math on. If we would like not to return a subset of CLASS, reject this
26303 alternative: if reload cannot do this, it will still use its choice. */
26304 mode = GET_MODE (x);
26305 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26306 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26308 if (X87_FLOAT_MODE_P (mode))
26310 if (regclass == FP_TOP_SSE_REGS)
26312 else if (regclass == FP_SECOND_SSE_REGS)
26313 return FP_SECOND_REG;
26315 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26322 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26323 enum machine_mode mode,
26324 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26326 /* QImode spills from non-QI registers require
26327 intermediate register on 32bit targets. */
26328 if (!in_p && mode == QImode && !TARGET_64BIT
26329 && (rclass == GENERAL_REGS
26330 || rclass == LEGACY_REGS
26331 || rclass == INDEX_REGS))
26340 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26341 regno = true_regnum (x);
26343 /* Return Q_REGS if the operand is in memory. */
26351 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26354 ix86_class_likely_spilled_p (reg_class_t rclass)
26365 case SSE_FIRST_REG:
26367 case FP_SECOND_REG:
26377 /* If we are copying between general and FP registers, we need a memory
26378 location. The same is true for SSE and MMX registers.
26380 To optimize register_move_cost performance, allow inline variant.
26382 The macro can't work reliably when one of the CLASSES is class containing
26383 registers from multiple units (SSE, MMX, integer). We avoid this by never
26384 combining those units in single alternative in the machine description.
26385 Ensure that this constraint holds to avoid unexpected surprises.
26387 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26388 enforce these sanity checks. */
26391 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26392 enum machine_mode mode, int strict)
26394 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26395 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26396 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26397 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26398 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26399 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26401 gcc_assert (!strict);
26405 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26408 /* ??? This is a lie. We do have moves between mmx/general, and for
26409 mmx/sse2. But by saying we need secondary memory we discourage the
26410 register allocator from using the mmx registers unless needed. */
26411 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26414 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26416 /* SSE1 doesn't have any direct moves from other classes. */
26420 /* If the target says that inter-unit moves are more expensive
26421 than moving through memory, then don't generate them. */
26422 if (!TARGET_INTER_UNIT_MOVES)
26425 /* Between SSE and general, we have moves no larger than word size. */
26426 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26434 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26435 enum machine_mode mode, int strict)
26437 return inline_secondary_memory_needed (class1, class2, mode, strict);
26440 /* Return true if the registers in CLASS cannot represent the change from
26441 modes FROM to TO. */
26444 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26445 enum reg_class regclass)
26450 /* x87 registers can't do subreg at all, as all values are reformatted
26451 to extended precision. */
26452 if (MAYBE_FLOAT_CLASS_P (regclass))
26455 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26457 /* Vector registers do not support QI or HImode loads. If we don't
26458 disallow a change to these modes, reload will assume it's ok to
26459 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26460 the vec_dupv4hi pattern. */
26461 if (GET_MODE_SIZE (from) < 4)
26464 /* Vector registers do not support subreg with nonzero offsets, which
26465 are otherwise valid for integer registers. Since we can't see
26466 whether we have a nonzero offset from here, prohibit all
26467 nonparadoxical subregs changing size. */
26468 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26475 /* Return the cost of moving data of mode M between a
26476 register and memory. A value of 2 is the default; this cost is
26477 relative to those in `REGISTER_MOVE_COST'.
26479 This function is used extensively by register_move_cost that is used to
26480 build tables at startup. Make it inline in this case.
26481 When IN is 2, return maximum of in and out move cost.
26483 If moving between registers and memory is more expensive than
26484 between two registers, you should define this macro to express the
26487 Model also increased moving costs of QImode registers in non
26491 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26495 if (FLOAT_CLASS_P (regclass))
26513 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26514 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26516 if (SSE_CLASS_P (regclass))
26519 switch (GET_MODE_SIZE (mode))
26534 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26535 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26537 if (MMX_CLASS_P (regclass))
26540 switch (GET_MODE_SIZE (mode))
26552 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26553 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26555 switch (GET_MODE_SIZE (mode))
26558 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26561 return ix86_cost->int_store[0];
26562 if (TARGET_PARTIAL_REG_DEPENDENCY
26563 && optimize_function_for_speed_p (cfun))
26564 cost = ix86_cost->movzbl_load;
26566 cost = ix86_cost->int_load[0];
26568 return MAX (cost, ix86_cost->int_store[0]);
26574 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26576 return ix86_cost->movzbl_load;
26578 return ix86_cost->int_store[0] + 4;
26583 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26584 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26586 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26587 if (mode == TFmode)
26590 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26592 cost = ix86_cost->int_load[2];
26594 cost = ix86_cost->int_store[2];
26595 return (cost * (((int) GET_MODE_SIZE (mode)
26596 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26601 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26604 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26608 /* Return the cost of moving data from a register in class CLASS1 to
26609 one in class CLASS2.
26611 It is not required that the cost always equal 2 when FROM is the same as TO;
26612 on some machines it is expensive to move between registers if they are not
26613 general registers. */
26616 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26617 reg_class_t class2_i)
26619 enum reg_class class1 = (enum reg_class) class1_i;
26620 enum reg_class class2 = (enum reg_class) class2_i;
26622 /* In case we require secondary memory, compute cost of the store followed
26623 by load. In order to avoid bad register allocation choices, we need
26624 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26626 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26630 cost += inline_memory_move_cost (mode, class1, 2);
26631 cost += inline_memory_move_cost (mode, class2, 2);
26633 /* In case of copying from general_purpose_register we may emit multiple
26634 stores followed by single load causing memory size mismatch stall.
26635 Count this as arbitrarily high cost of 20. */
26636 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26639 /* In the case of FP/MMX moves, the registers actually overlap, and we
26640 have to switch modes in order to treat them differently. */
26641 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26642 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26648 /* Moves between SSE/MMX and integer unit are expensive. */
26649 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26650 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26652 /* ??? By keeping returned value relatively high, we limit the number
26653 of moves between integer and MMX/SSE registers for all targets.
26654 Additionally, high value prevents problem with x86_modes_tieable_p(),
26655 where integer modes in MMX/SSE registers are not tieable
26656 because of missing QImode and HImode moves to, from or between
26657 MMX/SSE registers. */
26658 return MAX (8, ix86_cost->mmxsse_to_integer);
26660 if (MAYBE_FLOAT_CLASS_P (class1))
26661 return ix86_cost->fp_move;
26662 if (MAYBE_SSE_CLASS_P (class1))
26663 return ix86_cost->sse_move;
26664 if (MAYBE_MMX_CLASS_P (class1))
26665 return ix86_cost->mmx_move;
26669 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26672 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26674 /* Flags and only flags can only hold CCmode values. */
26675 if (CC_REGNO_P (regno))
26676 return GET_MODE_CLASS (mode) == MODE_CC;
26677 if (GET_MODE_CLASS (mode) == MODE_CC
26678 || GET_MODE_CLASS (mode) == MODE_RANDOM
26679 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26681 if (FP_REGNO_P (regno))
26682 return VALID_FP_MODE_P (mode);
26683 if (SSE_REGNO_P (regno))
26685 /* We implement the move patterns for all vector modes into and
26686 out of SSE registers, even when no operation instructions
26687 are available. OImode move is available only when AVX is
26689 return ((TARGET_AVX && mode == OImode)
26690 || VALID_AVX256_REG_MODE (mode)
26691 || VALID_SSE_REG_MODE (mode)
26692 || VALID_SSE2_REG_MODE (mode)
26693 || VALID_MMX_REG_MODE (mode)
26694 || VALID_MMX_REG_MODE_3DNOW (mode));
26696 if (MMX_REGNO_P (regno))
26698 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26699 so if the register is available at all, then we can move data of
26700 the given mode into or out of it. */
26701 return (VALID_MMX_REG_MODE (mode)
26702 || VALID_MMX_REG_MODE_3DNOW (mode));
26705 if (mode == QImode)
26707 /* Take care for QImode values - they can be in non-QI regs,
26708 but then they do cause partial register stalls. */
26709 if (regno <= BX_REG || TARGET_64BIT)
26711 if (!TARGET_PARTIAL_REG_STALL)
26713 return reload_in_progress || reload_completed;
26715 /* We handle both integer and floats in the general purpose registers. */
26716 else if (VALID_INT_MODE_P (mode))
26718 else if (VALID_FP_MODE_P (mode))
26720 else if (VALID_DFP_MODE_P (mode))
26722 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26723 on to use that value in smaller contexts, this can easily force a
26724 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26725 supporting DImode, allow it. */
26726 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26732 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26733 tieable integer mode. */
26736 ix86_tieable_integer_mode_p (enum machine_mode mode)
26745 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26748 return TARGET_64BIT;
26755 /* Return true if MODE1 is accessible in a register that can hold MODE2
26756 without copying. That is, all register classes that can hold MODE2
26757 can also hold MODE1. */
26760 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26762 if (mode1 == mode2)
26765 if (ix86_tieable_integer_mode_p (mode1)
26766 && ix86_tieable_integer_mode_p (mode2))
26769 /* MODE2 being XFmode implies fp stack or general regs, which means we
26770 can tie any smaller floating point modes to it. Note that we do not
26771 tie this with TFmode. */
26772 if (mode2 == XFmode)
26773 return mode1 == SFmode || mode1 == DFmode;
26775 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26776 that we can tie it with SFmode. */
26777 if (mode2 == DFmode)
26778 return mode1 == SFmode;
26780 /* If MODE2 is only appropriate for an SSE register, then tie with
26781 any other mode acceptable to SSE registers. */
26782 if (GET_MODE_SIZE (mode2) == 16
26783 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26784 return (GET_MODE_SIZE (mode1) == 16
26785 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26787 /* If MODE2 is appropriate for an MMX register, then tie
26788 with any other mode acceptable to MMX registers. */
26789 if (GET_MODE_SIZE (mode2) == 8
26790 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26791 return (GET_MODE_SIZE (mode1) == 8
26792 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26797 /* Compute a (partial) cost for rtx X. Return true if the complete
26798 cost has been computed, and false if subexpressions should be
26799 scanned. In either case, *TOTAL contains the cost result. */
26802 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26804 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26805 enum machine_mode mode = GET_MODE (x);
26806 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26814 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26816 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26818 else if (flag_pic && SYMBOLIC_CONST (x)
26820 || (!GET_CODE (x) != LABEL_REF
26821 && (GET_CODE (x) != SYMBOL_REF
26822 || !SYMBOL_REF_LOCAL_P (x)))))
26829 if (mode == VOIDmode)
26832 switch (standard_80387_constant_p (x))
26837 default: /* Other constants */
26842 /* Start with (MEM (SYMBOL_REF)), since that's where
26843 it'll probably end up. Add a penalty for size. */
26844 *total = (COSTS_N_INSNS (1)
26845 + (flag_pic != 0 && !TARGET_64BIT)
26846 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26852 /* The zero extensions is often completely free on x86_64, so make
26853 it as cheap as possible. */
26854 if (TARGET_64BIT && mode == DImode
26855 && GET_MODE (XEXP (x, 0)) == SImode)
26857 else if (TARGET_ZERO_EXTEND_WITH_AND)
26858 *total = cost->add;
26860 *total = cost->movzx;
26864 *total = cost->movsx;
26868 if (CONST_INT_P (XEXP (x, 1))
26869 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26871 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26874 *total = cost->add;
26877 if ((value == 2 || value == 3)
26878 && cost->lea <= cost->shift_const)
26880 *total = cost->lea;
26890 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26892 if (CONST_INT_P (XEXP (x, 1)))
26894 if (INTVAL (XEXP (x, 1)) > 32)
26895 *total = cost->shift_const + COSTS_N_INSNS (2);
26897 *total = cost->shift_const * 2;
26901 if (GET_CODE (XEXP (x, 1)) == AND)
26902 *total = cost->shift_var * 2;
26904 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26909 if (CONST_INT_P (XEXP (x, 1)))
26910 *total = cost->shift_const;
26912 *total = cost->shift_var;
26917 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26919 /* ??? SSE scalar cost should be used here. */
26920 *total = cost->fmul;
26923 else if (X87_FLOAT_MODE_P (mode))
26925 *total = cost->fmul;
26928 else if (FLOAT_MODE_P (mode))
26930 /* ??? SSE vector cost should be used here. */
26931 *total = cost->fmul;
26936 rtx op0 = XEXP (x, 0);
26937 rtx op1 = XEXP (x, 1);
26939 if (CONST_INT_P (XEXP (x, 1)))
26941 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26942 for (nbits = 0; value != 0; value &= value - 1)
26946 /* This is arbitrary. */
26949 /* Compute costs correctly for widening multiplication. */
26950 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26951 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26952 == GET_MODE_SIZE (mode))
26954 int is_mulwiden = 0;
26955 enum machine_mode inner_mode = GET_MODE (op0);
26957 if (GET_CODE (op0) == GET_CODE (op1))
26958 is_mulwiden = 1, op1 = XEXP (op1, 0);
26959 else if (CONST_INT_P (op1))
26961 if (GET_CODE (op0) == SIGN_EXTEND)
26962 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26965 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26969 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26972 *total = (cost->mult_init[MODE_INDEX (mode)]
26973 + nbits * cost->mult_bit
26974 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26983 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26984 /* ??? SSE cost should be used here. */
26985 *total = cost->fdiv;
26986 else if (X87_FLOAT_MODE_P (mode))
26987 *total = cost->fdiv;
26988 else if (FLOAT_MODE_P (mode))
26989 /* ??? SSE vector cost should be used here. */
26990 *total = cost->fdiv;
26992 *total = cost->divide[MODE_INDEX (mode)];
26996 if (GET_MODE_CLASS (mode) == MODE_INT
26997 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26999 if (GET_CODE (XEXP (x, 0)) == PLUS
27000 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27001 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27002 && CONSTANT_P (XEXP (x, 1)))
27004 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27005 if (val == 2 || val == 4 || val == 8)
27007 *total = cost->lea;
27008 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27009 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27010 outer_code, speed);
27011 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27015 else if (GET_CODE (XEXP (x, 0)) == MULT
27016 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27018 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27019 if (val == 2 || val == 4 || val == 8)
27021 *total = cost->lea;
27022 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27023 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27027 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27029 *total = cost->lea;
27030 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27031 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27032 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27039 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27041 /* ??? SSE cost should be used here. */
27042 *total = cost->fadd;
27045 else if (X87_FLOAT_MODE_P (mode))
27047 *total = cost->fadd;
27050 else if (FLOAT_MODE_P (mode))
27052 /* ??? SSE vector cost should be used here. */
27053 *total = cost->fadd;
27061 if (!TARGET_64BIT && mode == DImode)
27063 *total = (cost->add * 2
27064 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27065 << (GET_MODE (XEXP (x, 0)) != DImode))
27066 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27067 << (GET_MODE (XEXP (x, 1)) != DImode)));
27073 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27075 /* ??? SSE cost should be used here. */
27076 *total = cost->fchs;
27079 else if (X87_FLOAT_MODE_P (mode))
27081 *total = cost->fchs;
27084 else if (FLOAT_MODE_P (mode))
27086 /* ??? SSE vector cost should be used here. */
27087 *total = cost->fchs;
27093 if (!TARGET_64BIT && mode == DImode)
27094 *total = cost->add * 2;
27096 *total = cost->add;
27100 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27101 && XEXP (XEXP (x, 0), 1) == const1_rtx
27102 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27103 && XEXP (x, 1) == const0_rtx)
27105 /* This kind of construct is implemented using test[bwl].
27106 Treat it as if we had an AND. */
27107 *total = (cost->add
27108 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27109 + rtx_cost (const1_rtx, outer_code, speed));
27115 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27120 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27121 /* ??? SSE cost should be used here. */
27122 *total = cost->fabs;
27123 else if (X87_FLOAT_MODE_P (mode))
27124 *total = cost->fabs;
27125 else if (FLOAT_MODE_P (mode))
27126 /* ??? SSE vector cost should be used here. */
27127 *total = cost->fabs;
27131 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27132 /* ??? SSE cost should be used here. */
27133 *total = cost->fsqrt;
27134 else if (X87_FLOAT_MODE_P (mode))
27135 *total = cost->fsqrt;
27136 else if (FLOAT_MODE_P (mode))
27137 /* ??? SSE vector cost should be used here. */
27138 *total = cost->fsqrt;
27142 if (XINT (x, 1) == UNSPEC_TP)
27149 case VEC_DUPLICATE:
27150 /* ??? Assume all of these vector manipulation patterns are
27151 recognizable. In which case they all pretty much have the
27153 *total = COSTS_N_INSNS (1);
27163 static int current_machopic_label_num;
27165 /* Given a symbol name and its associated stub, write out the
27166 definition of the stub. */
27169 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27171 unsigned int length;
27172 char *binder_name, *symbol_name, lazy_ptr_name[32];
27173 int label = ++current_machopic_label_num;
27175 /* For 64-bit we shouldn't get here. */
27176 gcc_assert (!TARGET_64BIT);
27178 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27179 symb = targetm.strip_name_encoding (symb);
27181 length = strlen (stub);
27182 binder_name = XALLOCAVEC (char, length + 32);
27183 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27185 length = strlen (symb);
27186 symbol_name = XALLOCAVEC (char, length + 32);
27187 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27189 sprintf (lazy_ptr_name, "L%d$lz", label);
27192 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27194 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27196 fprintf (file, "%s:\n", stub);
27197 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27201 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27202 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27203 fprintf (file, "\tjmp\t*%%edx\n");
27206 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27208 fprintf (file, "%s:\n", binder_name);
27212 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27213 fputs ("\tpushl\t%eax\n", file);
27216 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27218 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27220 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27221 fprintf (file, "%s:\n", lazy_ptr_name);
27222 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27223 fprintf (file, ASM_LONG "%s\n", binder_name);
27225 #endif /* TARGET_MACHO */
27227 /* Order the registers for register allocator. */
27230 x86_order_regs_for_local_alloc (void)
27235 /* First allocate the local general purpose registers. */
27236 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27237 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27238 reg_alloc_order [pos++] = i;
27240 /* Global general purpose registers. */
27241 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27242 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27243 reg_alloc_order [pos++] = i;
27245 /* x87 registers come first in case we are doing FP math
27247 if (!TARGET_SSE_MATH)
27248 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27249 reg_alloc_order [pos++] = i;
27251 /* SSE registers. */
27252 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27253 reg_alloc_order [pos++] = i;
27254 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27255 reg_alloc_order [pos++] = i;
27257 /* x87 registers. */
27258 if (TARGET_SSE_MATH)
27259 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27260 reg_alloc_order [pos++] = i;
27262 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27263 reg_alloc_order [pos++] = i;
27265 /* Initialize the rest of array as we do not allocate some registers
27267 while (pos < FIRST_PSEUDO_REGISTER)
27268 reg_alloc_order [pos++] = 0;
27271 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27272 struct attribute_spec.handler. */
27274 ix86_handle_abi_attribute (tree *node, tree name,
27275 tree args ATTRIBUTE_UNUSED,
27276 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27278 if (TREE_CODE (*node) != FUNCTION_TYPE
27279 && TREE_CODE (*node) != METHOD_TYPE
27280 && TREE_CODE (*node) != FIELD_DECL
27281 && TREE_CODE (*node) != TYPE_DECL)
27283 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27285 *no_add_attrs = true;
27290 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27292 *no_add_attrs = true;
27296 /* Can combine regparm with all attributes but fastcall. */
27297 if (is_attribute_p ("ms_abi", name))
27299 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27301 error ("ms_abi and sysv_abi attributes are not compatible");
27306 else if (is_attribute_p ("sysv_abi", name))
27308 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27310 error ("ms_abi and sysv_abi attributes are not compatible");
27319 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27320 struct attribute_spec.handler. */
27322 ix86_handle_struct_attribute (tree *node, tree name,
27323 tree args ATTRIBUTE_UNUSED,
27324 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27327 if (DECL_P (*node))
27329 if (TREE_CODE (*node) == TYPE_DECL)
27330 type = &TREE_TYPE (*node);
27335 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27336 || TREE_CODE (*type) == UNION_TYPE)))
27338 warning (OPT_Wattributes, "%qE attribute ignored",
27340 *no_add_attrs = true;
27343 else if ((is_attribute_p ("ms_struct", name)
27344 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27345 || ((is_attribute_p ("gcc_struct", name)
27346 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27348 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27350 *no_add_attrs = true;
27357 ix86_handle_fndecl_attribute (tree *node, tree name,
27358 tree args ATTRIBUTE_UNUSED,
27359 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27361 if (TREE_CODE (*node) != FUNCTION_DECL)
27363 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27365 *no_add_attrs = true;
27371 ix86_ms_bitfield_layout_p (const_tree record_type)
27373 return ((TARGET_MS_BITFIELD_LAYOUT
27374 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27375 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27378 /* Returns an expression indicating where the this parameter is
27379 located on entry to the FUNCTION. */
27382 x86_this_parameter (tree function)
27384 tree type = TREE_TYPE (function);
27385 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27390 const int *parm_regs;
27392 if (ix86_function_type_abi (type) == MS_ABI)
27393 parm_regs = x86_64_ms_abi_int_parameter_registers;
27395 parm_regs = x86_64_int_parameter_registers;
27396 return gen_rtx_REG (DImode, parm_regs[aggr]);
27399 nregs = ix86_function_regparm (type, function);
27401 if (nregs > 0 && !stdarg_p (type))
27405 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27406 regno = aggr ? DX_REG : CX_REG;
27407 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27411 return gen_rtx_MEM (SImode,
27412 plus_constant (stack_pointer_rtx, 4));
27421 return gen_rtx_MEM (SImode,
27422 plus_constant (stack_pointer_rtx, 4));
27425 return gen_rtx_REG (SImode, regno);
27428 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27431 /* Determine whether x86_output_mi_thunk can succeed. */
27434 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27435 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27436 HOST_WIDE_INT vcall_offset, const_tree function)
27438 /* 64-bit can handle anything. */
27442 /* For 32-bit, everything's fine if we have one free register. */
27443 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27446 /* Need a free register for vcall_offset. */
27450 /* Need a free register for GOT references. */
27451 if (flag_pic && !targetm.binds_local_p (function))
27454 /* Otherwise ok. */
27458 /* Output the assembler code for a thunk function. THUNK_DECL is the
27459 declaration for the thunk function itself, FUNCTION is the decl for
27460 the target function. DELTA is an immediate constant offset to be
27461 added to THIS. If VCALL_OFFSET is nonzero, the word at
27462 *(*this + vcall_offset) should be added to THIS. */
27465 x86_output_mi_thunk (FILE *file,
27466 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27467 HOST_WIDE_INT vcall_offset, tree function)
27470 rtx this_param = x86_this_parameter (function);
27473 /* Make sure unwind info is emitted for the thunk if needed. */
27474 final_start_function (emit_barrier (), file, 1);
27476 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27477 pull it in now and let DELTA benefit. */
27478 if (REG_P (this_param))
27479 this_reg = this_param;
27480 else if (vcall_offset)
27482 /* Put the this parameter into %eax. */
27483 xops[0] = this_param;
27484 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27485 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27488 this_reg = NULL_RTX;
27490 /* Adjust the this parameter by a fixed constant. */
27493 xops[0] = GEN_INT (delta);
27494 xops[1] = this_reg ? this_reg : this_param;
27497 if (!x86_64_general_operand (xops[0], DImode))
27499 tmp = gen_rtx_REG (DImode, R10_REG);
27501 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27503 xops[1] = this_param;
27505 if (x86_maybe_negate_const_int (&xops[0], DImode))
27506 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27508 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27510 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27511 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27513 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27516 /* Adjust the this parameter by a value stored in the vtable. */
27520 tmp = gen_rtx_REG (DImode, R10_REG);
27523 int tmp_regno = CX_REG;
27524 if (lookup_attribute ("fastcall",
27525 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27526 || lookup_attribute ("thiscall",
27527 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27528 tmp_regno = AX_REG;
27529 tmp = gen_rtx_REG (SImode, tmp_regno);
27532 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27534 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27536 /* Adjust the this parameter. */
27537 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27538 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27540 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27541 xops[0] = GEN_INT (vcall_offset);
27543 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27544 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27546 xops[1] = this_reg;
27547 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27550 /* If necessary, drop THIS back to its stack slot. */
27551 if (this_reg && this_reg != this_param)
27553 xops[0] = this_reg;
27554 xops[1] = this_param;
27555 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27558 xops[0] = XEXP (DECL_RTL (function), 0);
27561 if (!flag_pic || targetm.binds_local_p (function))
27562 output_asm_insn ("jmp\t%P0", xops);
27563 /* All thunks should be in the same object as their target,
27564 and thus binds_local_p should be true. */
27565 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27566 gcc_unreachable ();
27569 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27570 tmp = gen_rtx_CONST (Pmode, tmp);
27571 tmp = gen_rtx_MEM (QImode, tmp);
27573 output_asm_insn ("jmp\t%A0", xops);
27578 if (!flag_pic || targetm.binds_local_p (function))
27579 output_asm_insn ("jmp\t%P0", xops);
27584 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27585 if (TARGET_MACHO_BRANCH_ISLANDS)
27586 sym_ref = (gen_rtx_SYMBOL_REF
27588 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27589 tmp = gen_rtx_MEM (QImode, sym_ref);
27591 output_asm_insn ("jmp\t%0", xops);
27594 #endif /* TARGET_MACHO */
27596 tmp = gen_rtx_REG (SImode, CX_REG);
27597 output_set_got (tmp, NULL_RTX);
27600 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27601 output_asm_insn ("jmp\t{*}%1", xops);
27604 final_end_function ();
27608 x86_file_start (void)
27610 default_file_start ();
27612 darwin_file_start ();
27614 if (X86_FILE_START_VERSION_DIRECTIVE)
27615 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27616 if (X86_FILE_START_FLTUSED)
27617 fputs ("\t.global\t__fltused\n", asm_out_file);
27618 if (ix86_asm_dialect == ASM_INTEL)
27619 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27623 x86_field_alignment (tree field, int computed)
27625 enum machine_mode mode;
27626 tree type = TREE_TYPE (field);
27628 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27630 mode = TYPE_MODE (strip_array_types (type));
27631 if (mode == DFmode || mode == DCmode
27632 || GET_MODE_CLASS (mode) == MODE_INT
27633 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27634 return MIN (32, computed);
27638 /* Output assembler code to FILE to increment profiler label # LABELNO
27639 for profiling a function entry. */
27641 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27643 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
27648 #ifndef NO_PROFILE_COUNTERS
27649 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27652 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27653 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
27655 fprintf (file, "\tcall\t%s\n", mcount_name);
27659 #ifndef NO_PROFILE_COUNTERS
27660 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27663 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
27667 #ifndef NO_PROFILE_COUNTERS
27668 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27671 fprintf (file, "\tcall\t%s\n", mcount_name);
27675 /* We don't have exact information about the insn sizes, but we may assume
27676 quite safely that we are informed about all 1 byte insns and memory
27677 address sizes. This is enough to eliminate unnecessary padding in
27681 min_insn_size (rtx insn)
27685 if (!INSN_P (insn) || !active_insn_p (insn))
27688 /* Discard alignments we've emit and jump instructions. */
27689 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27690 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27692 if (JUMP_TABLE_DATA_P (insn))
27695 /* Important case - calls are always 5 bytes.
27696 It is common to have many calls in the row. */
27698 && symbolic_reference_mentioned_p (PATTERN (insn))
27699 && !SIBLING_CALL_P (insn))
27701 len = get_attr_length (insn);
27705 /* For normal instructions we rely on get_attr_length being exact,
27706 with a few exceptions. */
27707 if (!JUMP_P (insn))
27709 enum attr_type type = get_attr_type (insn);
27714 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27715 || asm_noperands (PATTERN (insn)) >= 0)
27722 /* Otherwise trust get_attr_length. */
27726 l = get_attr_length_address (insn);
27727 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27736 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27738 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27742 ix86_avoid_jump_mispredicts (void)
27744 rtx insn, start = get_insns ();
27745 int nbytes = 0, njumps = 0;
27748 /* Look for all minimal intervals of instructions containing 4 jumps.
27749 The intervals are bounded by START and INSN. NBYTES is the total
27750 size of instructions in the interval including INSN and not including
27751 START. When the NBYTES is smaller than 16 bytes, it is possible
27752 that the end of START and INSN ends up in the same 16byte page.
27754 The smallest offset in the page INSN can start is the case where START
27755 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27756 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27758 for (insn = start; insn; insn = NEXT_INSN (insn))
27762 if (LABEL_P (insn))
27764 int align = label_to_alignment (insn);
27765 int max_skip = label_to_max_skip (insn);
27769 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27770 already in the current 16 byte page, because otherwise
27771 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27772 bytes to reach 16 byte boundary. */
27774 || (align <= 3 && max_skip != (1 << align) - 1))
27777 fprintf (dump_file, "Label %i with max_skip %i\n",
27778 INSN_UID (insn), max_skip);
27781 while (nbytes + max_skip >= 16)
27783 start = NEXT_INSN (start);
27784 if ((JUMP_P (start)
27785 && GET_CODE (PATTERN (start)) != ADDR_VEC
27786 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27788 njumps--, isjump = 1;
27791 nbytes -= min_insn_size (start);
27797 min_size = min_insn_size (insn);
27798 nbytes += min_size;
27800 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27801 INSN_UID (insn), min_size);
27803 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27804 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27812 start = NEXT_INSN (start);
27813 if ((JUMP_P (start)
27814 && GET_CODE (PATTERN (start)) != ADDR_VEC
27815 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27817 njumps--, isjump = 1;
27820 nbytes -= min_insn_size (start);
27822 gcc_assert (njumps >= 0);
27824 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27825 INSN_UID (start), INSN_UID (insn), nbytes);
27827 if (njumps == 3 && isjump && nbytes < 16)
27829 int padsize = 15 - nbytes + min_insn_size (insn);
27832 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27833 INSN_UID (insn), padsize);
27834 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27840 /* AMD Athlon works faster
27841 when RET is not destination of conditional jump or directly preceded
27842 by other jump instruction. We avoid the penalty by inserting NOP just
27843 before the RET instructions in such cases. */
27845 ix86_pad_returns (void)
27850 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27852 basic_block bb = e->src;
27853 rtx ret = BB_END (bb);
27855 bool replace = false;
27857 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27858 || optimize_bb_for_size_p (bb))
27860 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27861 if (active_insn_p (prev) || LABEL_P (prev))
27863 if (prev && LABEL_P (prev))
27868 FOR_EACH_EDGE (e, ei, bb->preds)
27869 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27870 && !(e->flags & EDGE_FALLTHRU))
27875 prev = prev_active_insn (ret);
27877 && ((JUMP_P (prev) && any_condjump_p (prev))
27880 /* Empty functions get branch mispredict even when the jump destination
27881 is not visible to us. */
27882 if (!prev && !optimize_function_for_size_p (cfun))
27887 emit_jump_insn_before (gen_return_internal_long (), ret);
27893 /* Count the minimum number of instructions in BB. Return 4 if the
27894 number of instructions >= 4. */
27897 ix86_count_insn_bb (basic_block bb)
27900 int insn_count = 0;
27902 /* Count number of instructions in this block. Return 4 if the number
27903 of instructions >= 4. */
27904 FOR_BB_INSNS (bb, insn)
27906 /* Only happen in exit blocks. */
27908 && GET_CODE (PATTERN (insn)) == RETURN)
27911 if (NONDEBUG_INSN_P (insn)
27912 && GET_CODE (PATTERN (insn)) != USE
27913 && GET_CODE (PATTERN (insn)) != CLOBBER)
27916 if (insn_count >= 4)
27925 /* Count the minimum number of instructions in code path in BB.
27926 Return 4 if the number of instructions >= 4. */
27929 ix86_count_insn (basic_block bb)
27933 int min_prev_count;
27935 /* Only bother counting instructions along paths with no
27936 more than 2 basic blocks between entry and exit. Given
27937 that BB has an edge to exit, determine if a predecessor
27938 of BB has an edge from entry. If so, compute the number
27939 of instructions in the predecessor block. If there
27940 happen to be multiple such blocks, compute the minimum. */
27941 min_prev_count = 4;
27942 FOR_EACH_EDGE (e, ei, bb->preds)
27945 edge_iterator prev_ei;
27947 if (e->src == ENTRY_BLOCK_PTR)
27949 min_prev_count = 0;
27952 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
27954 if (prev_e->src == ENTRY_BLOCK_PTR)
27956 int count = ix86_count_insn_bb (e->src);
27957 if (count < min_prev_count)
27958 min_prev_count = count;
27964 if (min_prev_count < 4)
27965 min_prev_count += ix86_count_insn_bb (bb);
27967 return min_prev_count;
27970 /* Pad short funtion to 4 instructions. */
27973 ix86_pad_short_function (void)
27978 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27980 rtx ret = BB_END (e->src);
27981 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
27983 int insn_count = ix86_count_insn (e->src);
27985 /* Pad short function. */
27986 if (insn_count < 4)
27990 /* Find epilogue. */
27993 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
27994 insn = PREV_INSN (insn);
27999 /* Two NOPs are counted as one instruction. */
28000 insn_count = 2 * (4 - insn_count);
28001 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28007 /* Implement machine specific optimizations. We implement padding of returns
28008 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28012 if (optimize && optimize_function_for_speed_p (cfun))
28014 if (TARGET_PAD_SHORT_FUNCTION)
28015 ix86_pad_short_function ();
28016 else if (TARGET_PAD_RETURNS)
28017 ix86_pad_returns ();
28018 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28019 if (TARGET_FOUR_JUMP_LIMIT)
28020 ix86_avoid_jump_mispredicts ();
28025 /* Return nonzero when QImode register that must be represented via REX prefix
28028 x86_extended_QIreg_mentioned_p (rtx insn)
28031 extract_insn_cached (insn);
28032 for (i = 0; i < recog_data.n_operands; i++)
28033 if (REG_P (recog_data.operand[i])
28034 && REGNO (recog_data.operand[i]) > BX_REG)
28039 /* Return nonzero when P points to register encoded via REX prefix.
28040 Called via for_each_rtx. */
28042 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28044 unsigned int regno;
28047 regno = REGNO (*p);
28048 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28051 /* Return true when INSN mentions register that must be encoded using REX
28054 x86_extended_reg_mentioned_p (rtx insn)
28056 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28057 extended_reg_mentioned_1, NULL);
28060 /* If profitable, negate (without causing overflow) integer constant
28061 of mode MODE at location LOC. Return true in this case. */
28063 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28067 if (!CONST_INT_P (*loc))
28073 /* DImode x86_64 constants must fit in 32 bits. */
28074 gcc_assert (x86_64_immediate_operand (*loc, mode));
28085 gcc_unreachable ();
28088 /* Avoid overflows. */
28089 if (mode_signbit_p (mode, *loc))
28092 val = INTVAL (*loc);
28094 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28095 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28096 if ((val < 0 && val != -128)
28099 *loc = GEN_INT (-val);
28106 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28107 optabs would emit if we didn't have TFmode patterns. */
28110 x86_emit_floatuns (rtx operands[2])
28112 rtx neglab, donelab, i0, i1, f0, in, out;
28113 enum machine_mode mode, inmode;
28115 inmode = GET_MODE (operands[1]);
28116 gcc_assert (inmode == SImode || inmode == DImode);
28119 in = force_reg (inmode, operands[1]);
28120 mode = GET_MODE (out);
28121 neglab = gen_label_rtx ();
28122 donelab = gen_label_rtx ();
28123 f0 = gen_reg_rtx (mode);
28125 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28127 expand_float (out, in, 0);
28129 emit_jump_insn (gen_jump (donelab));
28132 emit_label (neglab);
28134 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28136 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28138 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28140 expand_float (f0, i0, 0);
28142 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28144 emit_label (donelab);
28147 /* AVX does not support 32-byte integer vector operations,
28148 thus the longest vector we are faced with is V16QImode. */
28149 #define MAX_VECT_LEN 16
28151 struct expand_vec_perm_d
28153 rtx target, op0, op1;
28154 unsigned char perm[MAX_VECT_LEN];
28155 enum machine_mode vmode;
28156 unsigned char nelt;
28160 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28161 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28163 /* Get a vector mode of the same size as the original but with elements
28164 twice as wide. This is only guaranteed to apply to integral vectors. */
28166 static inline enum machine_mode
28167 get_mode_wider_vector (enum machine_mode o)
28169 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28170 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28171 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28172 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28176 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28177 with all elements equal to VAR. Return true if successful. */
28180 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28181 rtx target, rtx val)
28204 /* First attempt to recognize VAL as-is. */
28205 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28206 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28207 if (recog_memoized (insn) < 0)
28210 /* If that fails, force VAL into a register. */
28213 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28214 seq = get_insns ();
28217 emit_insn_before (seq, insn);
28219 ok = recog_memoized (insn) >= 0;
28228 if (TARGET_SSE || TARGET_3DNOW_A)
28232 val = gen_lowpart (SImode, val);
28233 x = gen_rtx_TRUNCATE (HImode, val);
28234 x = gen_rtx_VEC_DUPLICATE (mode, x);
28235 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28248 struct expand_vec_perm_d dperm;
28252 memset (&dperm, 0, sizeof (dperm));
28253 dperm.target = target;
28254 dperm.vmode = mode;
28255 dperm.nelt = GET_MODE_NUNITS (mode);
28256 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28258 /* Extend to SImode using a paradoxical SUBREG. */
28259 tmp1 = gen_reg_rtx (SImode);
28260 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28262 /* Insert the SImode value as low element of a V4SImode vector. */
28263 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28264 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28266 ok = (expand_vec_perm_1 (&dperm)
28267 || expand_vec_perm_broadcast_1 (&dperm));
28279 /* Replicate the value once into the next wider mode and recurse. */
28281 enum machine_mode smode, wsmode, wvmode;
28284 smode = GET_MODE_INNER (mode);
28285 wvmode = get_mode_wider_vector (mode);
28286 wsmode = GET_MODE_INNER (wvmode);
28288 val = convert_modes (wsmode, smode, val, true);
28289 x = expand_simple_binop (wsmode, ASHIFT, val,
28290 GEN_INT (GET_MODE_BITSIZE (smode)),
28291 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28292 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28294 x = gen_lowpart (wvmode, target);
28295 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28303 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28304 rtx x = gen_reg_rtx (hvmode);
28306 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28309 x = gen_rtx_VEC_CONCAT (mode, x, x);
28310 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28319 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28320 whose ONE_VAR element is VAR, and other elements are zero. Return true
28324 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28325 rtx target, rtx var, int one_var)
28327 enum machine_mode vsimode;
28330 bool use_vector_set = false;
28335 /* For SSE4.1, we normally use vector set. But if the second
28336 element is zero and inter-unit moves are OK, we use movq
28338 use_vector_set = (TARGET_64BIT
28340 && !(TARGET_INTER_UNIT_MOVES
28346 use_vector_set = TARGET_SSE4_1;
28349 use_vector_set = TARGET_SSE2;
28352 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28359 use_vector_set = TARGET_AVX;
28362 /* Use ix86_expand_vector_set in 64bit mode only. */
28363 use_vector_set = TARGET_AVX && TARGET_64BIT;
28369 if (use_vector_set)
28371 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28372 var = force_reg (GET_MODE_INNER (mode), var);
28373 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28389 var = force_reg (GET_MODE_INNER (mode), var);
28390 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28391 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28396 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28397 new_target = gen_reg_rtx (mode);
28399 new_target = target;
28400 var = force_reg (GET_MODE_INNER (mode), var);
28401 x = gen_rtx_VEC_DUPLICATE (mode, var);
28402 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28403 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28406 /* We need to shuffle the value to the correct position, so
28407 create a new pseudo to store the intermediate result. */
28409 /* With SSE2, we can use the integer shuffle insns. */
28410 if (mode != V4SFmode && TARGET_SSE2)
28412 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28414 GEN_INT (one_var == 1 ? 0 : 1),
28415 GEN_INT (one_var == 2 ? 0 : 1),
28416 GEN_INT (one_var == 3 ? 0 : 1)));
28417 if (target != new_target)
28418 emit_move_insn (target, new_target);
28422 /* Otherwise convert the intermediate result to V4SFmode and
28423 use the SSE1 shuffle instructions. */
28424 if (mode != V4SFmode)
28426 tmp = gen_reg_rtx (V4SFmode);
28427 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28432 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28434 GEN_INT (one_var == 1 ? 0 : 1),
28435 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28436 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28438 if (mode != V4SFmode)
28439 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28440 else if (tmp != target)
28441 emit_move_insn (target, tmp);
28443 else if (target != new_target)
28444 emit_move_insn (target, new_target);
28449 vsimode = V4SImode;
28455 vsimode = V2SImode;
28461 /* Zero extend the variable element to SImode and recurse. */
28462 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28464 x = gen_reg_rtx (vsimode);
28465 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28467 gcc_unreachable ();
28469 emit_move_insn (target, gen_lowpart (mode, x));
28477 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28478 consisting of the values in VALS. It is known that all elements
28479 except ONE_VAR are constants. Return true if successful. */
28482 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28483 rtx target, rtx vals, int one_var)
28485 rtx var = XVECEXP (vals, 0, one_var);
28486 enum machine_mode wmode;
28489 const_vec = copy_rtx (vals);
28490 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28491 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28499 /* For the two element vectors, it's just as easy to use
28500 the general case. */
28504 /* Use ix86_expand_vector_set in 64bit mode only. */
28527 /* There's no way to set one QImode entry easily. Combine
28528 the variable value with its adjacent constant value, and
28529 promote to an HImode set. */
28530 x = XVECEXP (vals, 0, one_var ^ 1);
28533 var = convert_modes (HImode, QImode, var, true);
28534 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28535 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28536 x = GEN_INT (INTVAL (x) & 0xff);
28540 var = convert_modes (HImode, QImode, var, true);
28541 x = gen_int_mode (INTVAL (x) << 8, HImode);
28543 if (x != const0_rtx)
28544 var = expand_simple_binop (HImode, IOR, var, x, var,
28545 1, OPTAB_LIB_WIDEN);
28547 x = gen_reg_rtx (wmode);
28548 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28549 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28551 emit_move_insn (target, gen_lowpart (mode, x));
28558 emit_move_insn (target, const_vec);
28559 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28563 /* A subroutine of ix86_expand_vector_init_general. Use vector
28564 concatenate to handle the most general case: all values variable,
28565 and none identical. */
28568 ix86_expand_vector_init_concat (enum machine_mode mode,
28569 rtx target, rtx *ops, int n)
28571 enum machine_mode cmode, hmode = VOIDmode;
28572 rtx first[8], second[4];
28612 gcc_unreachable ();
28615 if (!register_operand (ops[1], cmode))
28616 ops[1] = force_reg (cmode, ops[1]);
28617 if (!register_operand (ops[0], cmode))
28618 ops[0] = force_reg (cmode, ops[0]);
28619 emit_insn (gen_rtx_SET (VOIDmode, target,
28620 gen_rtx_VEC_CONCAT (mode, ops[0],
28640 gcc_unreachable ();
28656 gcc_unreachable ();
28661 /* FIXME: We process inputs backward to help RA. PR 36222. */
28664 for (; i > 0; i -= 2, j--)
28666 first[j] = gen_reg_rtx (cmode);
28667 v = gen_rtvec (2, ops[i - 1], ops[i]);
28668 ix86_expand_vector_init (false, first[j],
28669 gen_rtx_PARALLEL (cmode, v));
28675 gcc_assert (hmode != VOIDmode);
28676 for (i = j = 0; i < n; i += 2, j++)
28678 second[j] = gen_reg_rtx (hmode);
28679 ix86_expand_vector_init_concat (hmode, second [j],
28683 ix86_expand_vector_init_concat (mode, target, second, n);
28686 ix86_expand_vector_init_concat (mode, target, first, n);
28690 gcc_unreachable ();
28694 /* A subroutine of ix86_expand_vector_init_general. Use vector
28695 interleave to handle the most general case: all values variable,
28696 and none identical. */
28699 ix86_expand_vector_init_interleave (enum machine_mode mode,
28700 rtx target, rtx *ops, int n)
28702 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28705 rtx (*gen_load_even) (rtx, rtx, rtx);
28706 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28707 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28712 gen_load_even = gen_vec_setv8hi;
28713 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28714 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28715 inner_mode = HImode;
28716 first_imode = V4SImode;
28717 second_imode = V2DImode;
28718 third_imode = VOIDmode;
28721 gen_load_even = gen_vec_setv16qi;
28722 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28723 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28724 inner_mode = QImode;
28725 first_imode = V8HImode;
28726 second_imode = V4SImode;
28727 third_imode = V2DImode;
28730 gcc_unreachable ();
28733 for (i = 0; i < n; i++)
28735 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28736 op0 = gen_reg_rtx (SImode);
28737 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28739 /* Insert the SImode value as low element of V4SImode vector. */
28740 op1 = gen_reg_rtx (V4SImode);
28741 op0 = gen_rtx_VEC_MERGE (V4SImode,
28742 gen_rtx_VEC_DUPLICATE (V4SImode,
28744 CONST0_RTX (V4SImode),
28746 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28748 /* Cast the V4SImode vector back to a vector in orignal mode. */
28749 op0 = gen_reg_rtx (mode);
28750 emit_move_insn (op0, gen_lowpart (mode, op1));
28752 /* Load even elements into the second positon. */
28753 emit_insn (gen_load_even (op0,
28754 force_reg (inner_mode,
28758 /* Cast vector to FIRST_IMODE vector. */
28759 ops[i] = gen_reg_rtx (first_imode);
28760 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28763 /* Interleave low FIRST_IMODE vectors. */
28764 for (i = j = 0; i < n; i += 2, j++)
28766 op0 = gen_reg_rtx (first_imode);
28767 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28769 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28770 ops[j] = gen_reg_rtx (second_imode);
28771 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28774 /* Interleave low SECOND_IMODE vectors. */
28775 switch (second_imode)
28778 for (i = j = 0; i < n / 2; i += 2, j++)
28780 op0 = gen_reg_rtx (second_imode);
28781 emit_insn (gen_interleave_second_low (op0, ops[i],
28784 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28786 ops[j] = gen_reg_rtx (third_imode);
28787 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28789 second_imode = V2DImode;
28790 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28794 op0 = gen_reg_rtx (second_imode);
28795 emit_insn (gen_interleave_second_low (op0, ops[0],
28798 /* Cast the SECOND_IMODE vector back to a vector on original
28800 emit_insn (gen_rtx_SET (VOIDmode, target,
28801 gen_lowpart (mode, op0)));
28805 gcc_unreachable ();
28809 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28810 all values variable, and none identical. */
28813 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28814 rtx target, rtx vals)
28816 rtx ops[32], op0, op1;
28817 enum machine_mode half_mode = VOIDmode;
28824 if (!mmx_ok && !TARGET_SSE)
28836 n = GET_MODE_NUNITS (mode);
28837 for (i = 0; i < n; i++)
28838 ops[i] = XVECEXP (vals, 0, i);
28839 ix86_expand_vector_init_concat (mode, target, ops, n);
28843 half_mode = V16QImode;
28847 half_mode = V8HImode;
28851 n = GET_MODE_NUNITS (mode);
28852 for (i = 0; i < n; i++)
28853 ops[i] = XVECEXP (vals, 0, i);
28854 op0 = gen_reg_rtx (half_mode);
28855 op1 = gen_reg_rtx (half_mode);
28856 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28858 ix86_expand_vector_init_interleave (half_mode, op1,
28859 &ops [n >> 1], n >> 2);
28860 emit_insn (gen_rtx_SET (VOIDmode, target,
28861 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28865 if (!TARGET_SSE4_1)
28873 /* Don't use ix86_expand_vector_init_interleave if we can't
28874 move from GPR to SSE register directly. */
28875 if (!TARGET_INTER_UNIT_MOVES)
28878 n = GET_MODE_NUNITS (mode);
28879 for (i = 0; i < n; i++)
28880 ops[i] = XVECEXP (vals, 0, i);
28881 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28889 gcc_unreachable ();
28893 int i, j, n_elts, n_words, n_elt_per_word;
28894 enum machine_mode inner_mode;
28895 rtx words[4], shift;
28897 inner_mode = GET_MODE_INNER (mode);
28898 n_elts = GET_MODE_NUNITS (mode);
28899 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28900 n_elt_per_word = n_elts / n_words;
28901 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28903 for (i = 0; i < n_words; ++i)
28905 rtx word = NULL_RTX;
28907 for (j = 0; j < n_elt_per_word; ++j)
28909 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28910 elt = convert_modes (word_mode, inner_mode, elt, true);
28916 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28917 word, 1, OPTAB_LIB_WIDEN);
28918 word = expand_simple_binop (word_mode, IOR, word, elt,
28919 word, 1, OPTAB_LIB_WIDEN);
28927 emit_move_insn (target, gen_lowpart (mode, words[0]));
28928 else if (n_words == 2)
28930 rtx tmp = gen_reg_rtx (mode);
28931 emit_clobber (tmp);
28932 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28933 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28934 emit_move_insn (target, tmp);
28936 else if (n_words == 4)
28938 rtx tmp = gen_reg_rtx (V4SImode);
28939 gcc_assert (word_mode == SImode);
28940 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28941 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28942 emit_move_insn (target, gen_lowpart (mode, tmp));
28945 gcc_unreachable ();
28949 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28950 instructions unless MMX_OK is true. */
28953 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28955 enum machine_mode mode = GET_MODE (target);
28956 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28957 int n_elts = GET_MODE_NUNITS (mode);
28958 int n_var = 0, one_var = -1;
28959 bool all_same = true, all_const_zero = true;
28963 for (i = 0; i < n_elts; ++i)
28965 x = XVECEXP (vals, 0, i);
28966 if (!(CONST_INT_P (x)
28967 || GET_CODE (x) == CONST_DOUBLE
28968 || GET_CODE (x) == CONST_FIXED))
28969 n_var++, one_var = i;
28970 else if (x != CONST0_RTX (inner_mode))
28971 all_const_zero = false;
28972 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28976 /* Constants are best loaded from the constant pool. */
28979 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28983 /* If all values are identical, broadcast the value. */
28985 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28986 XVECEXP (vals, 0, 0)))
28989 /* Values where only one field is non-constant are best loaded from
28990 the pool and overwritten via move later. */
28994 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28995 XVECEXP (vals, 0, one_var),
28999 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29003 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29007 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29009 enum machine_mode mode = GET_MODE (target);
29010 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29011 enum machine_mode half_mode;
29012 bool use_vec_merge = false;
29014 static rtx (*gen_extract[6][2]) (rtx, rtx)
29016 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29017 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29018 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29019 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29020 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29021 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29023 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29025 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29026 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29027 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29028 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29029 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29030 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29040 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29041 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29043 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29045 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29046 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29052 use_vec_merge = TARGET_SSE4_1;
29060 /* For the two element vectors, we implement a VEC_CONCAT with
29061 the extraction of the other element. */
29063 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29064 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29067 op0 = val, op1 = tmp;
29069 op0 = tmp, op1 = val;
29071 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29072 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29077 use_vec_merge = TARGET_SSE4_1;
29084 use_vec_merge = true;
29088 /* tmp = target = A B C D */
29089 tmp = copy_to_reg (target);
29090 /* target = A A B B */
29091 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29092 /* target = X A B B */
29093 ix86_expand_vector_set (false, target, val, 0);
29094 /* target = A X C D */
29095 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29096 const1_rtx, const0_rtx,
29097 GEN_INT (2+4), GEN_INT (3+4)));
29101 /* tmp = target = A B C D */
29102 tmp = copy_to_reg (target);
29103 /* tmp = X B C D */
29104 ix86_expand_vector_set (false, tmp, val, 0);
29105 /* target = A B X D */
29106 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29107 const0_rtx, const1_rtx,
29108 GEN_INT (0+4), GEN_INT (3+4)));
29112 /* tmp = target = A B C D */
29113 tmp = copy_to_reg (target);
29114 /* tmp = X B C D */
29115 ix86_expand_vector_set (false, tmp, val, 0);
29116 /* target = A B X D */
29117 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29118 const0_rtx, const1_rtx,
29119 GEN_INT (2+4), GEN_INT (0+4)));
29123 gcc_unreachable ();
29128 use_vec_merge = TARGET_SSE4_1;
29132 /* Element 0 handled by vec_merge below. */
29135 use_vec_merge = true;
29141 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29142 store into element 0, then shuffle them back. */
29146 order[0] = GEN_INT (elt);
29147 order[1] = const1_rtx;
29148 order[2] = const2_rtx;
29149 order[3] = GEN_INT (3);
29150 order[elt] = const0_rtx;
29152 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29153 order[1], order[2], order[3]));
29155 ix86_expand_vector_set (false, target, val, 0);
29157 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29158 order[1], order[2], order[3]));
29162 /* For SSE1, we have to reuse the V4SF code. */
29163 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29164 gen_lowpart (SFmode, val), elt);
29169 use_vec_merge = TARGET_SSE2;
29172 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29176 use_vec_merge = TARGET_SSE4_1;
29183 half_mode = V16QImode;
29189 half_mode = V8HImode;
29195 half_mode = V4SImode;
29201 half_mode = V2DImode;
29207 half_mode = V4SFmode;
29213 half_mode = V2DFmode;
29219 /* Compute offset. */
29223 gcc_assert (i <= 1);
29225 /* Extract the half. */
29226 tmp = gen_reg_rtx (half_mode);
29227 emit_insn (gen_extract[j][i] (tmp, target));
29229 /* Put val in tmp at elt. */
29230 ix86_expand_vector_set (false, tmp, val, elt);
29233 emit_insn (gen_insert[j][i] (target, target, tmp));
29242 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29243 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29244 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29248 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29250 emit_move_insn (mem, target);
29252 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29253 emit_move_insn (tmp, val);
29255 emit_move_insn (target, mem);
29260 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29262 enum machine_mode mode = GET_MODE (vec);
29263 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29264 bool use_vec_extr = false;
29277 use_vec_extr = true;
29281 use_vec_extr = TARGET_SSE4_1;
29293 tmp = gen_reg_rtx (mode);
29294 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29295 GEN_INT (elt), GEN_INT (elt),
29296 GEN_INT (elt+4), GEN_INT (elt+4)));
29300 tmp = gen_reg_rtx (mode);
29301 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29305 gcc_unreachable ();
29308 use_vec_extr = true;
29313 use_vec_extr = TARGET_SSE4_1;
29327 tmp = gen_reg_rtx (mode);
29328 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29329 GEN_INT (elt), GEN_INT (elt),
29330 GEN_INT (elt), GEN_INT (elt)));
29334 tmp = gen_reg_rtx (mode);
29335 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29339 gcc_unreachable ();
29342 use_vec_extr = true;
29347 /* For SSE1, we have to reuse the V4SF code. */
29348 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29349 gen_lowpart (V4SFmode, vec), elt);
29355 use_vec_extr = TARGET_SSE2;
29358 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29362 use_vec_extr = TARGET_SSE4_1;
29366 /* ??? Could extract the appropriate HImode element and shift. */
29373 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29374 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29376 /* Let the rtl optimizers know about the zero extension performed. */
29377 if (inner_mode == QImode || inner_mode == HImode)
29379 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29380 target = gen_lowpart (SImode, target);
29383 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29387 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29389 emit_move_insn (mem, vec);
29391 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29392 emit_move_insn (target, tmp);
29396 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29397 pattern to reduce; DEST is the destination; IN is the input vector. */
29400 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29402 rtx tmp1, tmp2, tmp3;
29404 tmp1 = gen_reg_rtx (V4SFmode);
29405 tmp2 = gen_reg_rtx (V4SFmode);
29406 tmp3 = gen_reg_rtx (V4SFmode);
29408 emit_insn (gen_sse_movhlps (tmp1, in, in));
29409 emit_insn (fn (tmp2, tmp1, in));
29411 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29412 const1_rtx, const1_rtx,
29413 GEN_INT (1+4), GEN_INT (1+4)));
29414 emit_insn (fn (dest, tmp2, tmp3));
29417 /* Target hook for scalar_mode_supported_p. */
29419 ix86_scalar_mode_supported_p (enum machine_mode mode)
29421 if (DECIMAL_FLOAT_MODE_P (mode))
29422 return default_decimal_float_supported_p ();
29423 else if (mode == TFmode)
29426 return default_scalar_mode_supported_p (mode);
29429 /* Implements target hook vector_mode_supported_p. */
29431 ix86_vector_mode_supported_p (enum machine_mode mode)
29433 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29435 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29437 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29439 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29441 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29446 /* Target hook for c_mode_for_suffix. */
29447 static enum machine_mode
29448 ix86_c_mode_for_suffix (char suffix)
29458 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29460 We do this in the new i386 backend to maintain source compatibility
29461 with the old cc0-based compiler. */
29464 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29465 tree inputs ATTRIBUTE_UNUSED,
29468 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29470 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29475 /* Implements target vector targetm.asm.encode_section_info. This
29476 is not used by netware. */
29478 static void ATTRIBUTE_UNUSED
29479 ix86_encode_section_info (tree decl, rtx rtl, int first)
29481 default_encode_section_info (decl, rtl, first);
29483 if (TREE_CODE (decl) == VAR_DECL
29484 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29485 && ix86_in_large_data_p (decl))
29486 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29489 /* Worker function for REVERSE_CONDITION. */
29492 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29494 return (mode != CCFPmode && mode != CCFPUmode
29495 ? reverse_condition (code)
29496 : reverse_condition_maybe_unordered (code));
29499 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29503 output_387_reg_move (rtx insn, rtx *operands)
29505 if (REG_P (operands[0]))
29507 if (REG_P (operands[1])
29508 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29510 if (REGNO (operands[0]) == FIRST_STACK_REG)
29511 return output_387_ffreep (operands, 0);
29512 return "fstp\t%y0";
29514 if (STACK_TOP_P (operands[0]))
29515 return "fld%Z1\t%y1";
29518 else if (MEM_P (operands[0]))
29520 gcc_assert (REG_P (operands[1]));
29521 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29522 return "fstp%Z0\t%y0";
29525 /* There is no non-popping store to memory for XFmode.
29526 So if we need one, follow the store with a load. */
29527 if (GET_MODE (operands[0]) == XFmode)
29528 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29530 return "fst%Z0\t%y0";
29537 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29538 FP status register is set. */
29541 ix86_emit_fp_unordered_jump (rtx label)
29543 rtx reg = gen_reg_rtx (HImode);
29546 emit_insn (gen_x86_fnstsw_1 (reg));
29548 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29550 emit_insn (gen_x86_sahf_1 (reg));
29552 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29553 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29557 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29559 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29560 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29563 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29564 gen_rtx_LABEL_REF (VOIDmode, label),
29566 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29568 emit_jump_insn (temp);
29569 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29572 /* Output code to perform a log1p XFmode calculation. */
29574 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29576 rtx label1 = gen_label_rtx ();
29577 rtx label2 = gen_label_rtx ();
29579 rtx tmp = gen_reg_rtx (XFmode);
29580 rtx tmp2 = gen_reg_rtx (XFmode);
29583 emit_insn (gen_absxf2 (tmp, op1));
29584 test = gen_rtx_GE (VOIDmode, tmp,
29585 CONST_DOUBLE_FROM_REAL_VALUE (
29586 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29588 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29590 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29591 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29592 emit_jump (label2);
29594 emit_label (label1);
29595 emit_move_insn (tmp, CONST1_RTX (XFmode));
29596 emit_insn (gen_addxf3 (tmp, op1, tmp));
29597 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29598 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29600 emit_label (label2);
29603 /* Output code to perform a Newton-Rhapson approximation of a single precision
29604 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29606 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29608 rtx x0, x1, e0, e1, two;
29610 x0 = gen_reg_rtx (mode);
29611 e0 = gen_reg_rtx (mode);
29612 e1 = gen_reg_rtx (mode);
29613 x1 = gen_reg_rtx (mode);
29615 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29617 if (VECTOR_MODE_P (mode))
29618 two = ix86_build_const_vector (SFmode, true, two);
29620 two = force_reg (mode, two);
29622 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29624 /* x0 = rcp(b) estimate */
29625 emit_insn (gen_rtx_SET (VOIDmode, x0,
29626 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29629 emit_insn (gen_rtx_SET (VOIDmode, e0,
29630 gen_rtx_MULT (mode, x0, a)));
29632 emit_insn (gen_rtx_SET (VOIDmode, e1,
29633 gen_rtx_MULT (mode, x0, b)));
29635 emit_insn (gen_rtx_SET (VOIDmode, x1,
29636 gen_rtx_MINUS (mode, two, e1)));
29637 /* res = e0 * x1 */
29638 emit_insn (gen_rtx_SET (VOIDmode, res,
29639 gen_rtx_MULT (mode, e0, x1)));
29642 /* Output code to perform a Newton-Rhapson approximation of a
29643 single precision floating point [reciprocal] square root. */
29645 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29648 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29651 x0 = gen_reg_rtx (mode);
29652 e0 = gen_reg_rtx (mode);
29653 e1 = gen_reg_rtx (mode);
29654 e2 = gen_reg_rtx (mode);
29655 e3 = gen_reg_rtx (mode);
29657 real_from_integer (&r, VOIDmode, -3, -1, 0);
29658 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29660 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29661 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29663 if (VECTOR_MODE_P (mode))
29665 mthree = ix86_build_const_vector (SFmode, true, mthree);
29666 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29669 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29670 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29672 /* x0 = rsqrt(a) estimate */
29673 emit_insn (gen_rtx_SET (VOIDmode, x0,
29674 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29677 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29682 zero = gen_reg_rtx (mode);
29683 mask = gen_reg_rtx (mode);
29685 zero = force_reg (mode, CONST0_RTX(mode));
29686 emit_insn (gen_rtx_SET (VOIDmode, mask,
29687 gen_rtx_NE (mode, zero, a)));
29689 emit_insn (gen_rtx_SET (VOIDmode, x0,
29690 gen_rtx_AND (mode, x0, mask)));
29694 emit_insn (gen_rtx_SET (VOIDmode, e0,
29695 gen_rtx_MULT (mode, x0, a)));
29697 emit_insn (gen_rtx_SET (VOIDmode, e1,
29698 gen_rtx_MULT (mode, e0, x0)));
29701 mthree = force_reg (mode, mthree);
29702 emit_insn (gen_rtx_SET (VOIDmode, e2,
29703 gen_rtx_PLUS (mode, e1, mthree)));
29705 mhalf = force_reg (mode, mhalf);
29707 /* e3 = -.5 * x0 */
29708 emit_insn (gen_rtx_SET (VOIDmode, e3,
29709 gen_rtx_MULT (mode, x0, mhalf)));
29711 /* e3 = -.5 * e0 */
29712 emit_insn (gen_rtx_SET (VOIDmode, e3,
29713 gen_rtx_MULT (mode, e0, mhalf)));
29714 /* ret = e2 * e3 */
29715 emit_insn (gen_rtx_SET (VOIDmode, res,
29716 gen_rtx_MULT (mode, e2, e3)));
29719 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29721 static void ATTRIBUTE_UNUSED
29722 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29725 /* With Binutils 2.15, the "@unwind" marker must be specified on
29726 every occurrence of the ".eh_frame" section, not just the first
29729 && strcmp (name, ".eh_frame") == 0)
29731 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29732 flags & SECTION_WRITE ? "aw" : "a");
29735 default_elf_asm_named_section (name, flags, decl);
29738 /* Return the mangling of TYPE if it is an extended fundamental type. */
29740 static const char *
29741 ix86_mangle_type (const_tree type)
29743 type = TYPE_MAIN_VARIANT (type);
29745 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29746 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29749 switch (TYPE_MODE (type))
29752 /* __float128 is "g". */
29755 /* "long double" or __float80 is "e". */
29762 /* For 32-bit code we can save PIC register setup by using
29763 __stack_chk_fail_local hidden function instead of calling
29764 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29765 register, so it is better to call __stack_chk_fail directly. */
29768 ix86_stack_protect_fail (void)
29770 return TARGET_64BIT
29771 ? default_external_stack_protect_fail ()
29772 : default_hidden_stack_protect_fail ();
29775 /* Select a format to encode pointers in exception handling data. CODE
29776 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29777 true if the symbol may be affected by dynamic relocations.
29779 ??? All x86 object file formats are capable of representing this.
29780 After all, the relocation needed is the same as for the call insn.
29781 Whether or not a particular assembler allows us to enter such, I
29782 guess we'll have to see. */
29784 asm_preferred_eh_data_format (int code, int global)
29788 int type = DW_EH_PE_sdata8;
29790 || ix86_cmodel == CM_SMALL_PIC
29791 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29792 type = DW_EH_PE_sdata4;
29793 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29795 if (ix86_cmodel == CM_SMALL
29796 || (ix86_cmodel == CM_MEDIUM && code))
29797 return DW_EH_PE_udata4;
29798 return DW_EH_PE_absptr;
29801 /* Expand copysign from SIGN to the positive value ABS_VALUE
29802 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29805 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29807 enum machine_mode mode = GET_MODE (sign);
29808 rtx sgn = gen_reg_rtx (mode);
29809 if (mask == NULL_RTX)
29811 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29812 if (!VECTOR_MODE_P (mode))
29814 /* We need to generate a scalar mode mask in this case. */
29815 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29816 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29817 mask = gen_reg_rtx (mode);
29818 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29822 mask = gen_rtx_NOT (mode, mask);
29823 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29824 gen_rtx_AND (mode, mask, sign)));
29825 emit_insn (gen_rtx_SET (VOIDmode, result,
29826 gen_rtx_IOR (mode, abs_value, sgn)));
29829 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29830 mask for masking out the sign-bit is stored in *SMASK, if that is
29833 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29835 enum machine_mode mode = GET_MODE (op0);
29838 xa = gen_reg_rtx (mode);
29839 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29840 if (!VECTOR_MODE_P (mode))
29842 /* We need to generate a scalar mode mask in this case. */
29843 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29844 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29845 mask = gen_reg_rtx (mode);
29846 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29848 emit_insn (gen_rtx_SET (VOIDmode, xa,
29849 gen_rtx_AND (mode, op0, mask)));
29857 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29858 swapping the operands if SWAP_OPERANDS is true. The expanded
29859 code is a forward jump to a newly created label in case the
29860 comparison is true. The generated label rtx is returned. */
29862 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29863 bool swap_operands)
29874 label = gen_label_rtx ();
29875 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29876 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29877 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29878 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29879 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29880 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29881 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29882 JUMP_LABEL (tmp) = label;
29887 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29888 using comparison code CODE. Operands are swapped for the comparison if
29889 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29891 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29892 bool swap_operands)
29894 enum machine_mode mode = GET_MODE (op0);
29895 rtx mask = gen_reg_rtx (mode);
29904 if (mode == DFmode)
29905 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29906 gen_rtx_fmt_ee (code, mode, op0, op1)));
29908 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29909 gen_rtx_fmt_ee (code, mode, op0, op1)));
29914 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29915 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29917 ix86_gen_TWO52 (enum machine_mode mode)
29919 REAL_VALUE_TYPE TWO52r;
29922 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29923 TWO52 = const_double_from_real_value (TWO52r, mode);
29924 TWO52 = force_reg (mode, TWO52);
29929 /* Expand SSE sequence for computing lround from OP1 storing
29932 ix86_expand_lround (rtx op0, rtx op1)
29934 /* C code for the stuff we're doing below:
29935 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29938 enum machine_mode mode = GET_MODE (op1);
29939 const struct real_format *fmt;
29940 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29943 /* load nextafter (0.5, 0.0) */
29944 fmt = REAL_MODE_FORMAT (mode);
29945 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29946 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29948 /* adj = copysign (0.5, op1) */
29949 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29950 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29952 /* adj = op1 + adj */
29953 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29955 /* op0 = (imode)adj */
29956 expand_fix (op0, adj, 0);
29959 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29962 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29964 /* C code for the stuff we're doing below (for do_floor):
29966 xi -= (double)xi > op1 ? 1 : 0;
29969 enum machine_mode fmode = GET_MODE (op1);
29970 enum machine_mode imode = GET_MODE (op0);
29971 rtx ireg, freg, label, tmp;
29973 /* reg = (long)op1 */
29974 ireg = gen_reg_rtx (imode);
29975 expand_fix (ireg, op1, 0);
29977 /* freg = (double)reg */
29978 freg = gen_reg_rtx (fmode);
29979 expand_float (freg, ireg, 0);
29981 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29982 label = ix86_expand_sse_compare_and_jump (UNLE,
29983 freg, op1, !do_floor);
29984 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29985 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29986 emit_move_insn (ireg, tmp);
29988 emit_label (label);
29989 LABEL_NUSES (label) = 1;
29991 emit_move_insn (op0, ireg);
29994 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29995 result in OPERAND0. */
29997 ix86_expand_rint (rtx operand0, rtx operand1)
29999 /* C code for the stuff we're doing below:
30000 xa = fabs (operand1);
30001 if (!isless (xa, 2**52))
30003 xa = xa + 2**52 - 2**52;
30004 return copysign (xa, operand1);
30006 enum machine_mode mode = GET_MODE (operand0);
30007 rtx res, xa, label, TWO52, mask;
30009 res = gen_reg_rtx (mode);
30010 emit_move_insn (res, operand1);
30012 /* xa = abs (operand1) */
30013 xa = ix86_expand_sse_fabs (res, &mask);
30015 /* if (!isless (xa, TWO52)) goto label; */
30016 TWO52 = ix86_gen_TWO52 (mode);
30017 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30019 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30020 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30022 ix86_sse_copysign_to_positive (res, xa, res, mask);
30024 emit_label (label);
30025 LABEL_NUSES (label) = 1;
30027 emit_move_insn (operand0, res);
30030 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30033 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30035 /* C code for the stuff we expand below.
30036 double xa = fabs (x), x2;
30037 if (!isless (xa, TWO52))
30039 xa = xa + TWO52 - TWO52;
30040 x2 = copysign (xa, x);
30049 enum machine_mode mode = GET_MODE (operand0);
30050 rtx xa, TWO52, tmp, label, one, res, mask;
30052 TWO52 = ix86_gen_TWO52 (mode);
30054 /* Temporary for holding the result, initialized to the input
30055 operand to ease control flow. */
30056 res = gen_reg_rtx (mode);
30057 emit_move_insn (res, operand1);
30059 /* xa = abs (operand1) */
30060 xa = ix86_expand_sse_fabs (res, &mask);
30062 /* if (!isless (xa, TWO52)) goto label; */
30063 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30065 /* xa = xa + TWO52 - TWO52; */
30066 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30067 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30069 /* xa = copysign (xa, operand1) */
30070 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30072 /* generate 1.0 or -1.0 */
30073 one = force_reg (mode,
30074 const_double_from_real_value (do_floor
30075 ? dconst1 : dconstm1, mode));
30077 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30078 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30079 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30080 gen_rtx_AND (mode, one, tmp)));
30081 /* We always need to subtract here to preserve signed zero. */
30082 tmp = expand_simple_binop (mode, MINUS,
30083 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30084 emit_move_insn (res, tmp);
30086 emit_label (label);
30087 LABEL_NUSES (label) = 1;
30089 emit_move_insn (operand0, res);
30092 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30095 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30097 /* C code for the stuff we expand below.
30098 double xa = fabs (x), x2;
30099 if (!isless (xa, TWO52))
30101 x2 = (double)(long)x;
30108 if (HONOR_SIGNED_ZEROS (mode))
30109 return copysign (x2, x);
30112 enum machine_mode mode = GET_MODE (operand0);
30113 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30115 TWO52 = ix86_gen_TWO52 (mode);
30117 /* Temporary for holding the result, initialized to the input
30118 operand to ease control flow. */
30119 res = gen_reg_rtx (mode);
30120 emit_move_insn (res, operand1);
30122 /* xa = abs (operand1) */
30123 xa = ix86_expand_sse_fabs (res, &mask);
30125 /* if (!isless (xa, TWO52)) goto label; */
30126 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30128 /* xa = (double)(long)x */
30129 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30130 expand_fix (xi, res, 0);
30131 expand_float (xa, xi, 0);
30134 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30136 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30137 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30138 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30139 gen_rtx_AND (mode, one, tmp)));
30140 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30141 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30142 emit_move_insn (res, tmp);
30144 if (HONOR_SIGNED_ZEROS (mode))
30145 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30147 emit_label (label);
30148 LABEL_NUSES (label) = 1;
30150 emit_move_insn (operand0, res);
30153 /* Expand SSE sequence for computing round from OPERAND1 storing
30154 into OPERAND0. Sequence that works without relying on DImode truncation
30155 via cvttsd2siq that is only available on 64bit targets. */
30157 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30159 /* C code for the stuff we expand below.
30160 double xa = fabs (x), xa2, x2;
30161 if (!isless (xa, TWO52))
30163 Using the absolute value and copying back sign makes
30164 -0.0 -> -0.0 correct.
30165 xa2 = xa + TWO52 - TWO52;
30170 else if (dxa > 0.5)
30172 x2 = copysign (xa2, x);
30175 enum machine_mode mode = GET_MODE (operand0);
30176 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30178 TWO52 = ix86_gen_TWO52 (mode);
30180 /* Temporary for holding the result, initialized to the input
30181 operand to ease control flow. */
30182 res = gen_reg_rtx (mode);
30183 emit_move_insn (res, operand1);
30185 /* xa = abs (operand1) */
30186 xa = ix86_expand_sse_fabs (res, &mask);
30188 /* if (!isless (xa, TWO52)) goto label; */
30189 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30191 /* xa2 = xa + TWO52 - TWO52; */
30192 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30193 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30195 /* dxa = xa2 - xa; */
30196 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30198 /* generate 0.5, 1.0 and -0.5 */
30199 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30200 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30201 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30205 tmp = gen_reg_rtx (mode);
30206 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30207 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30208 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30209 gen_rtx_AND (mode, one, tmp)));
30210 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30211 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30212 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30213 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30214 gen_rtx_AND (mode, one, tmp)));
30215 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30217 /* res = copysign (xa2, operand1) */
30218 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30220 emit_label (label);
30221 LABEL_NUSES (label) = 1;
30223 emit_move_insn (operand0, res);
30226 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30229 ix86_expand_trunc (rtx operand0, rtx operand1)
30231 /* C code for SSE variant we expand below.
30232 double xa = fabs (x), x2;
30233 if (!isless (xa, TWO52))
30235 x2 = (double)(long)x;
30236 if (HONOR_SIGNED_ZEROS (mode))
30237 return copysign (x2, x);
30240 enum machine_mode mode = GET_MODE (operand0);
30241 rtx xa, xi, TWO52, label, res, mask;
30243 TWO52 = ix86_gen_TWO52 (mode);
30245 /* Temporary for holding the result, initialized to the input
30246 operand to ease control flow. */
30247 res = gen_reg_rtx (mode);
30248 emit_move_insn (res, operand1);
30250 /* xa = abs (operand1) */
30251 xa = ix86_expand_sse_fabs (res, &mask);
30253 /* if (!isless (xa, TWO52)) goto label; */
30254 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30256 /* x = (double)(long)x */
30257 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30258 expand_fix (xi, res, 0);
30259 expand_float (res, xi, 0);
30261 if (HONOR_SIGNED_ZEROS (mode))
30262 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30264 emit_label (label);
30265 LABEL_NUSES (label) = 1;
30267 emit_move_insn (operand0, res);
30270 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30273 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30275 enum machine_mode mode = GET_MODE (operand0);
30276 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30278 /* C code for SSE variant we expand below.
30279 double xa = fabs (x), x2;
30280 if (!isless (xa, TWO52))
30282 xa2 = xa + TWO52 - TWO52;
30286 x2 = copysign (xa2, x);
30290 TWO52 = ix86_gen_TWO52 (mode);
30292 /* Temporary for holding the result, initialized to the input
30293 operand to ease control flow. */
30294 res = gen_reg_rtx (mode);
30295 emit_move_insn (res, operand1);
30297 /* xa = abs (operand1) */
30298 xa = ix86_expand_sse_fabs (res, &smask);
30300 /* if (!isless (xa, TWO52)) goto label; */
30301 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30303 /* res = xa + TWO52 - TWO52; */
30304 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30305 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30306 emit_move_insn (res, tmp);
30309 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30311 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30312 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30313 emit_insn (gen_rtx_SET (VOIDmode, mask,
30314 gen_rtx_AND (mode, mask, one)));
30315 tmp = expand_simple_binop (mode, MINUS,
30316 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30317 emit_move_insn (res, tmp);
30319 /* res = copysign (res, operand1) */
30320 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30322 emit_label (label);
30323 LABEL_NUSES (label) = 1;
30325 emit_move_insn (operand0, res);
30328 /* Expand SSE sequence for computing round from OPERAND1 storing
30331 ix86_expand_round (rtx operand0, rtx operand1)
30333 /* C code for the stuff we're doing below:
30334 double xa = fabs (x);
30335 if (!isless (xa, TWO52))
30337 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30338 return copysign (xa, x);
30340 enum machine_mode mode = GET_MODE (operand0);
30341 rtx res, TWO52, xa, label, xi, half, mask;
30342 const struct real_format *fmt;
30343 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30345 /* Temporary for holding the result, initialized to the input
30346 operand to ease control flow. */
30347 res = gen_reg_rtx (mode);
30348 emit_move_insn (res, operand1);
30350 TWO52 = ix86_gen_TWO52 (mode);
30351 xa = ix86_expand_sse_fabs (res, &mask);
30352 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30354 /* load nextafter (0.5, 0.0) */
30355 fmt = REAL_MODE_FORMAT (mode);
30356 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30357 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30359 /* xa = xa + 0.5 */
30360 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30361 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30363 /* xa = (double)(int64_t)xa */
30364 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30365 expand_fix (xi, xa, 0);
30366 expand_float (xa, xi, 0);
30368 /* res = copysign (xa, operand1) */
30369 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30371 emit_label (label);
30372 LABEL_NUSES (label) = 1;
30374 emit_move_insn (operand0, res);
30378 /* Table of valid machine attributes. */
30379 static const struct attribute_spec ix86_attribute_table[] =
30381 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30382 /* Stdcall attribute says callee is responsible for popping arguments
30383 if they are not variable. */
30384 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30385 /* Fastcall attribute says callee is responsible for popping arguments
30386 if they are not variable. */
30387 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30388 /* Thiscall attribute says callee is responsible for popping arguments
30389 if they are not variable. */
30390 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30391 /* Cdecl attribute says the callee is a normal C declaration */
30392 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30393 /* Regparm attribute specifies how many integer arguments are to be
30394 passed in registers. */
30395 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30396 /* Sseregparm attribute says we are using x86_64 calling conventions
30397 for FP arguments. */
30398 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30399 /* force_align_arg_pointer says this function realigns the stack at entry. */
30400 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30401 false, true, true, ix86_handle_cconv_attribute },
30402 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30403 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30404 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30405 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30407 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30408 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30409 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30410 SUBTARGET_ATTRIBUTE_TABLE,
30412 /* ms_abi and sysv_abi calling convention function attributes. */
30413 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30414 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30415 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30417 { NULL, 0, 0, false, false, false, NULL }
30420 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30422 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30423 tree vectype ATTRIBUTE_UNUSED,
30424 int misalign ATTRIBUTE_UNUSED)
30426 switch (type_of_cost)
30429 return ix86_cost->scalar_stmt_cost;
30432 return ix86_cost->scalar_load_cost;
30435 return ix86_cost->scalar_store_cost;
30438 return ix86_cost->vec_stmt_cost;
30441 return ix86_cost->vec_align_load_cost;
30444 return ix86_cost->vec_store_cost;
30446 case vec_to_scalar:
30447 return ix86_cost->vec_to_scalar_cost;
30449 case scalar_to_vec:
30450 return ix86_cost->scalar_to_vec_cost;
30452 case unaligned_load:
30453 case unaligned_store:
30454 return ix86_cost->vec_unalign_load_cost;
30456 case cond_branch_taken:
30457 return ix86_cost->cond_taken_branch_cost;
30459 case cond_branch_not_taken:
30460 return ix86_cost->cond_not_taken_branch_cost;
30466 gcc_unreachable ();
30471 /* Implement targetm.vectorize.builtin_vec_perm. */
30474 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30476 tree itype = TREE_TYPE (vec_type);
30477 bool u = TYPE_UNSIGNED (itype);
30478 enum machine_mode vmode = TYPE_MODE (vec_type);
30479 enum ix86_builtins fcode;
30480 bool ok = TARGET_SSE2;
30486 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30489 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30491 itype = ix86_get_builtin_type (IX86_BT_DI);
30496 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30500 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30502 itype = ix86_get_builtin_type (IX86_BT_SI);
30506 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30509 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30512 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30515 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30525 *mask_type = itype;
30526 return ix86_builtins[(int) fcode];
30529 /* Return a vector mode with twice as many elements as VMODE. */
30530 /* ??? Consider moving this to a table generated by genmodes.c. */
30532 static enum machine_mode
30533 doublesize_vector_mode (enum machine_mode vmode)
30537 case V2SFmode: return V4SFmode;
30538 case V1DImode: return V2DImode;
30539 case V2SImode: return V4SImode;
30540 case V4HImode: return V8HImode;
30541 case V8QImode: return V16QImode;
30543 case V2DFmode: return V4DFmode;
30544 case V4SFmode: return V8SFmode;
30545 case V2DImode: return V4DImode;
30546 case V4SImode: return V8SImode;
30547 case V8HImode: return V16HImode;
30548 case V16QImode: return V32QImode;
30550 case V4DFmode: return V8DFmode;
30551 case V8SFmode: return V16SFmode;
30552 case V4DImode: return V8DImode;
30553 case V8SImode: return V16SImode;
30554 case V16HImode: return V32HImode;
30555 case V32QImode: return V64QImode;
30558 gcc_unreachable ();
30562 /* Construct (set target (vec_select op0 (parallel perm))) and
30563 return true if that's a valid instruction in the active ISA. */
30566 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30568 rtx rperm[MAX_VECT_LEN], x;
30571 for (i = 0; i < nelt; ++i)
30572 rperm[i] = GEN_INT (perm[i]);
30574 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30575 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30576 x = gen_rtx_SET (VOIDmode, target, x);
30579 if (recog_memoized (x) < 0)
30587 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30590 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30591 const unsigned char *perm, unsigned nelt)
30593 enum machine_mode v2mode;
30596 v2mode = doublesize_vector_mode (GET_MODE (op0));
30597 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30598 return expand_vselect (target, x, perm, nelt);
30601 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30602 in terms of blendp[sd] / pblendw / pblendvb. */
30605 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30607 enum machine_mode vmode = d->vmode;
30608 unsigned i, mask, nelt = d->nelt;
30609 rtx target, op0, op1, x;
30611 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30613 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30616 /* This is a blend, not a permute. Elements must stay in their
30617 respective lanes. */
30618 for (i = 0; i < nelt; ++i)
30620 unsigned e = d->perm[i];
30621 if (!(e == i || e == i + nelt))
30628 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30629 decision should be extracted elsewhere, so that we only try that
30630 sequence once all budget==3 options have been tried. */
30632 /* For bytes, see if bytes move in pairs so we can use pblendw with
30633 an immediate argument, rather than pblendvb with a vector argument. */
30634 if (vmode == V16QImode)
30636 bool pblendw_ok = true;
30637 for (i = 0; i < 16 && pblendw_ok; i += 2)
30638 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30642 rtx rperm[16], vperm;
30644 for (i = 0; i < nelt; ++i)
30645 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30647 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30648 vperm = force_reg (V16QImode, vperm);
30650 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30655 target = d->target;
30667 for (i = 0; i < nelt; ++i)
30668 mask |= (d->perm[i] >= nelt) << i;
30672 for (i = 0; i < 2; ++i)
30673 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30677 for (i = 0; i < 4; ++i)
30678 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30682 for (i = 0; i < 8; ++i)
30683 mask |= (d->perm[i * 2] >= 16) << i;
30687 target = gen_lowpart (vmode, target);
30688 op0 = gen_lowpart (vmode, op0);
30689 op1 = gen_lowpart (vmode, op1);
30693 gcc_unreachable ();
30696 /* This matches five different patterns with the different modes. */
30697 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30698 x = gen_rtx_SET (VOIDmode, target, x);
30704 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30705 in terms of the variable form of vpermilps.
30707 Note that we will have already failed the immediate input vpermilps,
30708 which requires that the high and low part shuffle be identical; the
30709 variable form doesn't require that. */
30712 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30714 rtx rperm[8], vperm;
30717 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30720 /* We can only permute within the 128-bit lane. */
30721 for (i = 0; i < 8; ++i)
30723 unsigned e = d->perm[i];
30724 if (i < 4 ? e >= 4 : e < 4)
30731 for (i = 0; i < 8; ++i)
30733 unsigned e = d->perm[i];
30735 /* Within each 128-bit lane, the elements of op0 are numbered
30736 from 0 and the elements of op1 are numbered from 4. */
30742 rperm[i] = GEN_INT (e);
30745 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30746 vperm = force_reg (V8SImode, vperm);
30747 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30752 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30753 in terms of pshufb or vpperm. */
30756 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30758 unsigned i, nelt, eltsz;
30759 rtx rperm[16], vperm, target, op0, op1;
30761 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30763 if (GET_MODE_SIZE (d->vmode) != 16)
30770 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30772 for (i = 0; i < nelt; ++i)
30774 unsigned j, e = d->perm[i];
30775 for (j = 0; j < eltsz; ++j)
30776 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30779 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30780 vperm = force_reg (V16QImode, vperm);
30782 target = gen_lowpart (V16QImode, d->target);
30783 op0 = gen_lowpart (V16QImode, d->op0);
30784 if (d->op0 == d->op1)
30785 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30788 op1 = gen_lowpart (V16QImode, d->op1);
30789 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30795 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30796 in a single instruction. */
30799 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30801 unsigned i, nelt = d->nelt;
30802 unsigned char perm2[MAX_VECT_LEN];
30804 /* Check plain VEC_SELECT first, because AVX has instructions that could
30805 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30806 input where SEL+CONCAT may not. */
30807 if (d->op0 == d->op1)
30809 int mask = nelt - 1;
30811 for (i = 0; i < nelt; i++)
30812 perm2[i] = d->perm[i] & mask;
30814 if (expand_vselect (d->target, d->op0, perm2, nelt))
30817 /* There are plenty of patterns in sse.md that are written for
30818 SEL+CONCAT and are not replicated for a single op. Perhaps
30819 that should be changed, to avoid the nastiness here. */
30821 /* Recognize interleave style patterns, which means incrementing
30822 every other permutation operand. */
30823 for (i = 0; i < nelt; i += 2)
30825 perm2[i] = d->perm[i] & mask;
30826 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30828 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30831 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30834 for (i = 0; i < nelt; i += 4)
30836 perm2[i + 0] = d->perm[i + 0] & mask;
30837 perm2[i + 1] = d->perm[i + 1] & mask;
30838 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30839 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30842 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30847 /* Finally, try the fully general two operand permute. */
30848 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30851 /* Recognize interleave style patterns with reversed operands. */
30852 if (d->op0 != d->op1)
30854 for (i = 0; i < nelt; ++i)
30856 unsigned e = d->perm[i];
30864 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30868 /* Try the SSE4.1 blend variable merge instructions. */
30869 if (expand_vec_perm_blend (d))
30872 /* Try one of the AVX vpermil variable permutations. */
30873 if (expand_vec_perm_vpermil (d))
30876 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30877 if (expand_vec_perm_pshufb (d))
30883 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30884 in terms of a pair of pshuflw + pshufhw instructions. */
30887 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30889 unsigned char perm2[MAX_VECT_LEN];
30893 if (d->vmode != V8HImode || d->op0 != d->op1)
30896 /* The two permutations only operate in 64-bit lanes. */
30897 for (i = 0; i < 4; ++i)
30898 if (d->perm[i] >= 4)
30900 for (i = 4; i < 8; ++i)
30901 if (d->perm[i] < 4)
30907 /* Emit the pshuflw. */
30908 memcpy (perm2, d->perm, 4);
30909 for (i = 4; i < 8; ++i)
30911 ok = expand_vselect (d->target, d->op0, perm2, 8);
30914 /* Emit the pshufhw. */
30915 memcpy (perm2 + 4, d->perm + 4, 4);
30916 for (i = 0; i < 4; ++i)
30918 ok = expand_vselect (d->target, d->target, perm2, 8);
30924 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30925 the permutation using the SSSE3 palignr instruction. This succeeds
30926 when all of the elements in PERM fit within one vector and we merely
30927 need to shift them down so that a single vector permutation has a
30928 chance to succeed. */
30931 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30933 unsigned i, nelt = d->nelt;
30938 /* Even with AVX, palignr only operates on 128-bit vectors. */
30939 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30942 min = nelt, max = 0;
30943 for (i = 0; i < nelt; ++i)
30945 unsigned e = d->perm[i];
30951 if (min == 0 || max - min >= nelt)
30954 /* Given that we have SSSE3, we know we'll be able to implement the
30955 single operand permutation after the palignr with pshufb. */
30959 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30960 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30961 gen_lowpart (TImode, d->op1),
30962 gen_lowpart (TImode, d->op0), shift));
30964 d->op0 = d->op1 = d->target;
30967 for (i = 0; i < nelt; ++i)
30969 unsigned e = d->perm[i] - min;
30975 /* Test for the degenerate case where the alignment by itself
30976 produces the desired permutation. */
30980 ok = expand_vec_perm_1 (d);
30986 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30987 a two vector permutation into a single vector permutation by using
30988 an interleave operation to merge the vectors. */
30991 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30993 struct expand_vec_perm_d dremap, dfinal;
30994 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30995 unsigned contents, h1, h2, h3, h4;
30996 unsigned char remap[2 * MAX_VECT_LEN];
31000 if (d->op0 == d->op1)
31003 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31004 lanes. We can use similar techniques with the vperm2f128 instruction,
31005 but it requires slightly different logic. */
31006 if (GET_MODE_SIZE (d->vmode) != 16)
31009 /* Examine from whence the elements come. */
31011 for (i = 0; i < nelt; ++i)
31012 contents |= 1u << d->perm[i];
31014 /* Split the two input vectors into 4 halves. */
31015 h1 = (1u << nelt2) - 1;
31020 memset (remap, 0xff, sizeof (remap));
31023 /* If the elements from the low halves use interleave low, and similarly
31024 for interleave high. If the elements are from mis-matched halves, we
31025 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31026 if ((contents & (h1 | h3)) == contents)
31028 for (i = 0; i < nelt2; ++i)
31031 remap[i + nelt] = i * 2 + 1;
31032 dremap.perm[i * 2] = i;
31033 dremap.perm[i * 2 + 1] = i + nelt;
31036 else if ((contents & (h2 | h4)) == contents)
31038 for (i = 0; i < nelt2; ++i)
31040 remap[i + nelt2] = i * 2;
31041 remap[i + nelt + nelt2] = i * 2 + 1;
31042 dremap.perm[i * 2] = i + nelt2;
31043 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31046 else if ((contents & (h1 | h4)) == contents)
31048 for (i = 0; i < nelt2; ++i)
31051 remap[i + nelt + nelt2] = i + nelt2;
31052 dremap.perm[i] = i;
31053 dremap.perm[i + nelt2] = i + nelt + nelt2;
31057 dremap.vmode = V2DImode;
31059 dremap.perm[0] = 0;
31060 dremap.perm[1] = 3;
31063 else if ((contents & (h2 | h3)) == contents)
31065 for (i = 0; i < nelt2; ++i)
31067 remap[i + nelt2] = i;
31068 remap[i + nelt] = i + nelt2;
31069 dremap.perm[i] = i + nelt2;
31070 dremap.perm[i + nelt2] = i + nelt;
31074 dremap.vmode = V2DImode;
31076 dremap.perm[0] = 1;
31077 dremap.perm[1] = 2;
31083 /* Use the remapping array set up above to move the elements from their
31084 swizzled locations into their final destinations. */
31086 for (i = 0; i < nelt; ++i)
31088 unsigned e = remap[d->perm[i]];
31089 gcc_assert (e < nelt);
31090 dfinal.perm[i] = e;
31092 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31093 dfinal.op1 = dfinal.op0;
31094 dremap.target = dfinal.op0;
31096 /* Test if the final remap can be done with a single insn. For V4SFmode or
31097 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31099 ok = expand_vec_perm_1 (&dfinal);
31100 seq = get_insns ();
31106 if (dremap.vmode != dfinal.vmode)
31108 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31109 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31110 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31113 ok = expand_vec_perm_1 (&dremap);
31120 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31121 permutation with two pshufb insns and an ior. We should have already
31122 failed all two instruction sequences. */
31125 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31127 rtx rperm[2][16], vperm, l, h, op, m128;
31128 unsigned int i, nelt, eltsz;
31130 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31132 gcc_assert (d->op0 != d->op1);
31135 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31137 /* Generate two permutation masks. If the required element is within
31138 the given vector it is shuffled into the proper lane. If the required
31139 element is in the other vector, force a zero into the lane by setting
31140 bit 7 in the permutation mask. */
31141 m128 = GEN_INT (-128);
31142 for (i = 0; i < nelt; ++i)
31144 unsigned j, e = d->perm[i];
31145 unsigned which = (e >= nelt);
31149 for (j = 0; j < eltsz; ++j)
31151 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31152 rperm[1-which][i*eltsz + j] = m128;
31156 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31157 vperm = force_reg (V16QImode, vperm);
31159 l = gen_reg_rtx (V16QImode);
31160 op = gen_lowpart (V16QImode, d->op0);
31161 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31163 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31164 vperm = force_reg (V16QImode, vperm);
31166 h = gen_reg_rtx (V16QImode);
31167 op = gen_lowpart (V16QImode, d->op1);
31168 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31170 op = gen_lowpart (V16QImode, d->target);
31171 emit_insn (gen_iorv16qi3 (op, l, h));
31176 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31177 and extract-odd permutations. */
31180 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31182 rtx t1, t2, t3, t4;
31187 t1 = gen_reg_rtx (V4DFmode);
31188 t2 = gen_reg_rtx (V4DFmode);
31190 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31191 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31192 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31194 /* Now an unpck[lh]pd will produce the result required. */
31196 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31198 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31204 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
31205 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
31206 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
31208 t1 = gen_reg_rtx (V8SFmode);
31209 t2 = gen_reg_rtx (V8SFmode);
31210 t3 = gen_reg_rtx (V8SFmode);
31211 t4 = gen_reg_rtx (V8SFmode);
31213 /* Shuffle within the 128-bit lanes to produce:
31214 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
31215 expand_vselect (t1, d->op0, perm1, 8);
31216 expand_vselect (t2, d->op1, perm1, 8);
31218 /* Shuffle the lanes around to produce:
31219 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
31220 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
31221 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
31223 /* Now a vpermil2p will produce the result required. */
31224 /* ??? The vpermil2p requires a vector constant. Another option
31225 is a unpck[lh]ps to merge the two vectors to produce
31226 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
31227 vpermilps to get the elements into the final order. */
31230 memcpy (d->perm, odd ? permo: perme, 8);
31231 expand_vec_perm_vpermil (d);
31239 /* These are always directly implementable by expand_vec_perm_1. */
31240 gcc_unreachable ();
31244 return expand_vec_perm_pshufb2 (d);
31247 /* We need 2*log2(N)-1 operations to achieve odd/even
31248 with interleave. */
31249 t1 = gen_reg_rtx (V8HImode);
31250 t2 = gen_reg_rtx (V8HImode);
31251 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31252 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31253 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31254 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31256 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31258 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31265 return expand_vec_perm_pshufb2 (d);
31268 t1 = gen_reg_rtx (V16QImode);
31269 t2 = gen_reg_rtx (V16QImode);
31270 t3 = gen_reg_rtx (V16QImode);
31271 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31272 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31273 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31274 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31275 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31276 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31278 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31280 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31286 gcc_unreachable ();
31292 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31293 extract-even and extract-odd permutations. */
31296 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31298 unsigned i, odd, nelt = d->nelt;
31301 if (odd != 0 && odd != 1)
31304 for (i = 1; i < nelt; ++i)
31305 if (d->perm[i] != 2 * i + odd)
31308 return expand_vec_perm_even_odd_1 (d, odd);
31311 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31312 permutations. We assume that expand_vec_perm_1 has already failed. */
31315 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31317 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31318 enum machine_mode vmode = d->vmode;
31319 unsigned char perm2[4];
31327 /* These are special-cased in sse.md so that we can optionally
31328 use the vbroadcast instruction. They expand to two insns
31329 if the input happens to be in a register. */
31330 gcc_unreachable ();
31336 /* These are always implementable using standard shuffle patterns. */
31337 gcc_unreachable ();
31341 /* These can be implemented via interleave. We save one insn by
31342 stopping once we have promoted to V4SImode and then use pshufd. */
31345 optab otab = vec_interleave_low_optab;
31349 otab = vec_interleave_high_optab;
31354 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31355 vmode = get_mode_wider_vector (vmode);
31356 op0 = gen_lowpart (vmode, op0);
31358 while (vmode != V4SImode);
31360 memset (perm2, elt, 4);
31361 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31366 gcc_unreachable ();
31370 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31371 broadcast permutations. */
31374 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31376 unsigned i, elt, nelt = d->nelt;
31378 if (d->op0 != d->op1)
31382 for (i = 1; i < nelt; ++i)
31383 if (d->perm[i] != elt)
31386 return expand_vec_perm_broadcast_1 (d);
31389 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31390 With all of the interface bits taken care of, perform the expansion
31391 in D and return true on success. */
31394 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31396 /* Try a single instruction expansion. */
31397 if (expand_vec_perm_1 (d))
31400 /* Try sequences of two instructions. */
31402 if (expand_vec_perm_pshuflw_pshufhw (d))
31405 if (expand_vec_perm_palignr (d))
31408 if (expand_vec_perm_interleave2 (d))
31411 if (expand_vec_perm_broadcast (d))
31414 /* Try sequences of three instructions. */
31416 if (expand_vec_perm_pshufb2 (d))
31419 /* ??? Look for narrow permutations whose element orderings would
31420 allow the promotion to a wider mode. */
31422 /* ??? Look for sequences of interleave or a wider permute that place
31423 the data into the correct lanes for a half-vector shuffle like
31424 pshuf[lh]w or vpermilps. */
31426 /* ??? Look for sequences of interleave that produce the desired results.
31427 The combinatorics of punpck[lh] get pretty ugly... */
31429 if (expand_vec_perm_even_odd (d))
31435 /* Extract the values from the vector CST into the permutation array in D.
31436 Return 0 on error, 1 if all values from the permutation come from the
31437 first vector, 2 if all values from the second vector, and 3 otherwise. */
31440 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31442 tree list = TREE_VECTOR_CST_ELTS (cst);
31443 unsigned i, nelt = d->nelt;
31446 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31448 unsigned HOST_WIDE_INT e;
31450 if (!host_integerp (TREE_VALUE (list), 1))
31452 e = tree_low_cst (TREE_VALUE (list), 1);
31456 ret |= (e < nelt ? 1 : 2);
31459 gcc_assert (list == NULL);
31461 /* For all elements from second vector, fold the elements to first. */
31463 for (i = 0; i < nelt; ++i)
31464 d->perm[i] -= nelt;
31470 ix86_expand_vec_perm_builtin (tree exp)
31472 struct expand_vec_perm_d d;
31473 tree arg0, arg1, arg2;
31475 arg0 = CALL_EXPR_ARG (exp, 0);
31476 arg1 = CALL_EXPR_ARG (exp, 1);
31477 arg2 = CALL_EXPR_ARG (exp, 2);
31479 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31480 d.nelt = GET_MODE_NUNITS (d.vmode);
31481 d.testing_p = false;
31482 gcc_assert (VECTOR_MODE_P (d.vmode));
31484 if (TREE_CODE (arg2) != VECTOR_CST)
31486 error_at (EXPR_LOCATION (exp),
31487 "vector permutation requires vector constant");
31491 switch (extract_vec_perm_cst (&d, arg2))
31497 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31501 if (!operand_equal_p (arg0, arg1, 0))
31503 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31504 d.op0 = force_reg (d.vmode, d.op0);
31505 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31506 d.op1 = force_reg (d.vmode, d.op1);
31510 /* The elements of PERM do not suggest that only the first operand
31511 is used, but both operands are identical. Allow easier matching
31512 of the permutation by folding the permutation into the single
31515 unsigned i, nelt = d.nelt;
31516 for (i = 0; i < nelt; ++i)
31517 if (d.perm[i] >= nelt)
31523 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31524 d.op0 = force_reg (d.vmode, d.op0);
31529 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31530 d.op0 = force_reg (d.vmode, d.op0);
31535 d.target = gen_reg_rtx (d.vmode);
31536 if (ix86_expand_vec_perm_builtin_1 (&d))
31539 /* For compiler generated permutations, we should never got here, because
31540 the compiler should also be checking the ok hook. But since this is a
31541 builtin the user has access too, so don't abort. */
31545 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31548 sorry ("vector permutation (%d %d %d %d)",
31549 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31552 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31553 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31554 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31557 sorry ("vector permutation "
31558 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31559 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31560 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31561 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31562 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31565 gcc_unreachable ();
31568 return CONST0_RTX (d.vmode);
31571 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31574 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31576 struct expand_vec_perm_d d;
31580 d.vmode = TYPE_MODE (vec_type);
31581 d.nelt = GET_MODE_NUNITS (d.vmode);
31582 d.testing_p = true;
31584 /* Given sufficient ISA support we can just return true here
31585 for selected vector modes. */
31586 if (GET_MODE_SIZE (d.vmode) == 16)
31588 /* All implementable with a single vpperm insn. */
31591 /* All implementable with 2 pshufb + 1 ior. */
31594 /* All implementable with shufpd or unpck[lh]pd. */
31599 vec_mask = extract_vec_perm_cst (&d, mask);
31601 /* This hook is cannot be called in response to something that the
31602 user does (unlike the builtin expander) so we shouldn't ever see
31603 an error generated from the extract. */
31604 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31605 one_vec = (vec_mask != 3);
31607 /* Implementable with shufps or pshufd. */
31608 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31611 /* Otherwise we have to go through the motions and see if we can
31612 figure out how to generate the requested permutation. */
31613 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31614 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31616 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31619 ret = ix86_expand_vec_perm_builtin_1 (&d);
31626 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31628 struct expand_vec_perm_d d;
31634 d.vmode = GET_MODE (targ);
31635 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31636 d.testing_p = false;
31638 for (i = 0; i < nelt; ++i)
31639 d.perm[i] = i * 2 + odd;
31641 /* We'll either be able to implement the permutation directly... */
31642 if (expand_vec_perm_1 (&d))
31645 /* ... or we use the special-case patterns. */
31646 expand_vec_perm_even_odd_1 (&d, odd);
31649 /* This function returns the calling abi specific va_list type node.
31650 It returns the FNDECL specific va_list type. */
31653 ix86_fn_abi_va_list (tree fndecl)
31656 return va_list_type_node;
31657 gcc_assert (fndecl != NULL_TREE);
31659 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31660 return ms_va_list_type_node;
31662 return sysv_va_list_type_node;
31665 /* Returns the canonical va_list type specified by TYPE. If there
31666 is no valid TYPE provided, it return NULL_TREE. */
31669 ix86_canonical_va_list_type (tree type)
31673 /* Resolve references and pointers to va_list type. */
31674 if (TREE_CODE (type) == MEM_REF)
31675 type = TREE_TYPE (type);
31676 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31677 type = TREE_TYPE (type);
31678 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31679 type = TREE_TYPE (type);
31683 wtype = va_list_type_node;
31684 gcc_assert (wtype != NULL_TREE);
31686 if (TREE_CODE (wtype) == ARRAY_TYPE)
31688 /* If va_list is an array type, the argument may have decayed
31689 to a pointer type, e.g. by being passed to another function.
31690 In that case, unwrap both types so that we can compare the
31691 underlying records. */
31692 if (TREE_CODE (htype) == ARRAY_TYPE
31693 || POINTER_TYPE_P (htype))
31695 wtype = TREE_TYPE (wtype);
31696 htype = TREE_TYPE (htype);
31699 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31700 return va_list_type_node;
31701 wtype = sysv_va_list_type_node;
31702 gcc_assert (wtype != NULL_TREE);
31704 if (TREE_CODE (wtype) == ARRAY_TYPE)
31706 /* If va_list is an array type, the argument may have decayed
31707 to a pointer type, e.g. by being passed to another function.
31708 In that case, unwrap both types so that we can compare the
31709 underlying records. */
31710 if (TREE_CODE (htype) == ARRAY_TYPE
31711 || POINTER_TYPE_P (htype))
31713 wtype = TREE_TYPE (wtype);
31714 htype = TREE_TYPE (htype);
31717 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31718 return sysv_va_list_type_node;
31719 wtype = ms_va_list_type_node;
31720 gcc_assert (wtype != NULL_TREE);
31722 if (TREE_CODE (wtype) == ARRAY_TYPE)
31724 /* If va_list is an array type, the argument may have decayed
31725 to a pointer type, e.g. by being passed to another function.
31726 In that case, unwrap both types so that we can compare the
31727 underlying records. */
31728 if (TREE_CODE (htype) == ARRAY_TYPE
31729 || POINTER_TYPE_P (htype))
31731 wtype = TREE_TYPE (wtype);
31732 htype = TREE_TYPE (htype);
31735 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31736 return ms_va_list_type_node;
31739 return std_canonical_va_list_type (type);
31742 /* Iterate through the target-specific builtin types for va_list.
31743 IDX denotes the iterator, *PTREE is set to the result type of
31744 the va_list builtin, and *PNAME to its internal type.
31745 Returns zero if there is no element for this index, otherwise
31746 IDX should be increased upon the next call.
31747 Note, do not iterate a base builtin's name like __builtin_va_list.
31748 Used from c_common_nodes_and_builtins. */
31751 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31761 *ptree = ms_va_list_type_node;
31762 *pname = "__builtin_ms_va_list";
31766 *ptree = sysv_va_list_type_node;
31767 *pname = "__builtin_sysv_va_list";
31775 #undef TARGET_SCHED_DISPATCH
31776 #define TARGET_SCHED_DISPATCH has_dispatch
31777 #undef TARGET_SCHED_DISPATCH_DO
31778 #define TARGET_SCHED_DISPATCH_DO do_dispatch
31780 /* The size of the dispatch window is the total number of bytes of
31781 object code allowed in a window. */
31782 #define DISPATCH_WINDOW_SIZE 16
31784 /* Number of dispatch windows considered for scheduling. */
31785 #define MAX_DISPATCH_WINDOWS 3
31787 /* Maximum number of instructions in a window. */
31790 /* Maximum number of immediate operands in a window. */
31793 /* Maximum number of immediate bits allowed in a window. */
31794 #define MAX_IMM_SIZE 128
31796 /* Maximum number of 32 bit immediates allowed in a window. */
31797 #define MAX_IMM_32 4
31799 /* Maximum number of 64 bit immediates allowed in a window. */
31800 #define MAX_IMM_64 2
31802 /* Maximum total of loads or prefetches allowed in a window. */
31805 /* Maximum total of stores allowed in a window. */
31806 #define MAX_STORE 1
31812 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
31813 enum dispatch_group {
31828 /* Number of allowable groups in a dispatch window. It is an array
31829 indexed by dispatch_group enum. 100 is used as a big number,
31830 because the number of these kind of operations does not have any
31831 effect in dispatch window, but we need them for other reasons in
31833 static unsigned int num_allowable_groups[disp_last] = {
31834 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
31837 char group_name[disp_last + 1][16] = {
31838 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
31839 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
31840 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
31843 /* Instruction path. */
31846 path_single, /* Single micro op. */
31847 path_double, /* Double micro op. */
31848 path_multi, /* Instructions with more than 2 micro op.. */
31852 /* sched_insn_info defines a window to the instructions scheduled in
31853 the basic block. It contains a pointer to the insn_info table and
31854 the instruction scheduled.
31856 Windows are allocated for each basic block and are linked
31858 typedef struct sched_insn_info_s {
31860 enum dispatch_group group;
31861 enum insn_path path;
31866 /* Linked list of dispatch windows. This is a two way list of
31867 dispatch windows of a basic block. It contains information about
31868 the number of uops in the window and the total number of
31869 instructions and of bytes in the object code for this dispatch
31871 typedef struct dispatch_windows_s {
31872 int num_insn; /* Number of insn in the window. */
31873 int num_uops; /* Number of uops in the window. */
31874 int window_size; /* Number of bytes in the window. */
31875 int window_num; /* Window number between 0 or 1. */
31876 int num_imm; /* Number of immediates in an insn. */
31877 int num_imm_32; /* Number of 32 bit immediates in an insn. */
31878 int num_imm_64; /* Number of 64 bit immediates in an insn. */
31879 int imm_size; /* Total immediates in the window. */
31880 int num_loads; /* Total memory loads in the window. */
31881 int num_stores; /* Total memory stores in the window. */
31882 int violation; /* Violation exists in window. */
31883 sched_insn_info *window; /* Pointer to the window. */
31884 struct dispatch_windows_s *next;
31885 struct dispatch_windows_s *prev;
31886 } dispatch_windows;
31888 /* Immediate valuse used in an insn. */
31889 typedef struct imm_info_s
31896 static dispatch_windows *dispatch_window_list;
31897 static dispatch_windows *dispatch_window_list1;
31899 /* Get dispatch group of insn. */
31901 static enum dispatch_group
31902 get_mem_group (rtx insn)
31904 enum attr_memory memory;
31906 if (INSN_CODE (insn) < 0)
31907 return disp_no_group;
31908 memory = get_attr_memory (insn);
31909 if (memory == MEMORY_STORE)
31912 if (memory == MEMORY_LOAD)
31915 if (memory == MEMORY_BOTH)
31916 return disp_load_store;
31918 return disp_no_group;
31921 /* Return true if insn is a compare instruction. */
31926 enum attr_type type;
31928 type = get_attr_type (insn);
31929 return (type == TYPE_TEST
31930 || type == TYPE_ICMP
31931 || type == TYPE_FCMP
31932 || GET_CODE (PATTERN (insn)) == COMPARE);
31935 /* Return true if a dispatch violation encountered. */
31938 dispatch_violation (void)
31940 if (dispatch_window_list->next)
31941 return dispatch_window_list->next->violation;
31942 return dispatch_window_list->violation;
31945 /* Return true if insn is a branch instruction. */
31948 is_branch (rtx insn)
31950 return (CALL_P (insn) || JUMP_P (insn));
31953 /* Return true if insn is a prefetch instruction. */
31956 is_prefetch (rtx insn)
31958 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
31961 /* This function initializes a dispatch window and the list container holding a
31962 pointer to the window. */
31965 init_window (int window_num)
31968 dispatch_windows *new_list;
31970 if (window_num == 0)
31971 new_list = dispatch_window_list;
31973 new_list = dispatch_window_list1;
31975 new_list->num_insn = 0;
31976 new_list->num_uops = 0;
31977 new_list->window_size = 0;
31978 new_list->next = NULL;
31979 new_list->prev = NULL;
31980 new_list->window_num = window_num;
31981 new_list->num_imm = 0;
31982 new_list->num_imm_32 = 0;
31983 new_list->num_imm_64 = 0;
31984 new_list->imm_size = 0;
31985 new_list->num_loads = 0;
31986 new_list->num_stores = 0;
31987 new_list->violation = false;
31989 for (i = 0; i < MAX_INSN; i++)
31991 new_list->window[i].insn = NULL;
31992 new_list->window[i].group = disp_no_group;
31993 new_list->window[i].path = no_path;
31994 new_list->window[i].byte_len = 0;
31995 new_list->window[i].imm_bytes = 0;
32000 /* This function allocates and initializes a dispatch window and the
32001 list container holding a pointer to the window. */
32003 static dispatch_windows *
32004 allocate_window (void)
32006 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32007 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32012 /* This routine initializes the dispatch scheduling information. It
32013 initiates building dispatch scheduler tables and constructs the
32014 first dispatch window. */
32017 init_dispatch_sched (void)
32019 /* Allocate a dispatch list and a window. */
32020 dispatch_window_list = allocate_window ();
32021 dispatch_window_list1 = allocate_window ();
32026 /* This function returns true if a branch is detected. End of a basic block
32027 does not have to be a branch, but here we assume only branches end a
32031 is_end_basic_block (enum dispatch_group group)
32033 return group == disp_branch;
32036 /* This function is called when the end of a window processing is reached. */
32039 process_end_window (void)
32041 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32042 if (dispatch_window_list->next)
32044 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32045 gcc_assert (dispatch_window_list->window_size
32046 + dispatch_window_list1->window_size <= 48);
32052 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32053 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32054 for 48 bytes of instructions. Note that these windows are not dispatch
32055 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32057 static dispatch_windows *
32058 allocate_next_window (int window_num)
32060 if (window_num == 0)
32062 if (dispatch_window_list->next)
32065 return dispatch_window_list;
32068 dispatch_window_list->next = dispatch_window_list1;
32069 dispatch_window_list1->prev = dispatch_window_list;
32071 return dispatch_window_list1;
32074 /* Increment the number of immediate operands of an instruction. */
32077 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32082 switch ( GET_CODE (*in_rtx))
32087 (imm_values->imm)++;
32088 if (x86_64_immediate_operand (*in_rtx, SImode))
32089 (imm_values->imm32)++;
32091 (imm_values->imm64)++;
32095 (imm_values->imm)++;
32096 (imm_values->imm64)++;
32100 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32102 (imm_values->imm)++;
32103 (imm_values->imm32)++;
32114 /* Compute number of immediate operands of an instruction. */
32117 find_constant (rtx in_rtx, imm_info *imm_values)
32119 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32120 (rtx_function) find_constant_1, (void *) imm_values);
32123 /* Return total size of immediate operands of an instruction along with number
32124 of corresponding immediate-operands. It initializes its parameters to zero
32125 befor calling FIND_CONSTANT.
32126 INSN is the input instruction. IMM is the total of immediates.
32127 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32131 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32133 imm_info imm_values = {0, 0, 0};
32135 find_constant (insn, &imm_values);
32136 *imm = imm_values.imm;
32137 *imm32 = imm_values.imm32;
32138 *imm64 = imm_values.imm64;
32139 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32142 /* This function indicates if an operand of an instruction is an
32146 has_immediate (rtx insn)
32148 int num_imm_operand;
32149 int num_imm32_operand;
32150 int num_imm64_operand;
32153 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32154 &num_imm64_operand);
32158 /* Return single or double path for instructions. */
32160 static enum insn_path
32161 get_insn_path (rtx insn)
32163 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32165 if ((int)path == 0)
32166 return path_single;
32168 if ((int)path == 1)
32169 return path_double;
32174 /* Return insn dispatch group. */
32176 static enum dispatch_group
32177 get_insn_group (rtx insn)
32179 enum dispatch_group group = get_mem_group (insn);
32183 if (is_branch (insn))
32184 return disp_branch;
32189 if (has_immediate (insn))
32192 if (is_prefetch (insn))
32193 return disp_prefetch;
32195 return disp_no_group;
32198 /* Count number of GROUP restricted instructions in a dispatch
32199 window WINDOW_LIST. */
32202 count_num_restricted (rtx insn, dispatch_windows *window_list)
32204 enum dispatch_group group = get_insn_group (insn);
32206 int num_imm_operand;
32207 int num_imm32_operand;
32208 int num_imm64_operand;
32210 if (group == disp_no_group)
32213 if (group == disp_imm)
32215 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32216 &num_imm64_operand);
32217 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32218 || num_imm_operand + window_list->num_imm > MAX_IMM
32219 || (num_imm32_operand > 0
32220 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32221 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32222 || (num_imm64_operand > 0
32223 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32224 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32225 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32226 && num_imm64_operand > 0
32227 && ((window_list->num_imm_64 > 0
32228 && window_list->num_insn >= 2)
32229 || window_list->num_insn >= 3)))
32235 if ((group == disp_load_store
32236 && (window_list->num_loads >= MAX_LOAD
32237 || window_list->num_stores >= MAX_STORE))
32238 || ((group == disp_load
32239 || group == disp_prefetch)
32240 && window_list->num_loads >= MAX_LOAD)
32241 || (group == disp_store
32242 && window_list->num_stores >= MAX_STORE))
32248 /* This function returns true if insn satisfies dispatch rules on the
32249 last window scheduled. */
32252 fits_dispatch_window (rtx insn)
32254 dispatch_windows *window_list = dispatch_window_list;
32255 dispatch_windows *window_list_next = dispatch_window_list->next;
32256 unsigned int num_restrict;
32257 enum dispatch_group group = get_insn_group (insn);
32258 enum insn_path path = get_insn_path (insn);
32261 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32262 instructions should be given the lowest priority in the
32263 scheduling process in Haifa scheduler to make sure they will be
32264 scheduled in the same dispatch window as the refrence to them. */
32265 if (group == disp_jcc || group == disp_cmp)
32268 /* Check nonrestricted. */
32269 if (group == disp_no_group || group == disp_branch)
32272 /* Get last dispatch window. */
32273 if (window_list_next)
32274 window_list = window_list_next;
32276 if (window_list->window_num == 1)
32278 sum = window_list->prev->window_size + window_list->window_size;
32281 || (min_insn_size (insn) + sum) >= 48)
32282 /* Window 1 is full. Go for next window. */
32286 num_restrict = count_num_restricted (insn, window_list);
32288 if (num_restrict > num_allowable_groups[group])
32291 /* See if it fits in the first window. */
32292 if (window_list->window_num == 0)
32294 /* The first widow should have only single and double path
32296 if (path == path_double
32297 && (window_list->num_uops + 2) > MAX_INSN)
32299 else if (path != path_single)
32305 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32306 dispatch window WINDOW_LIST. */
32309 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32311 int byte_len = min_insn_size (insn);
32312 int num_insn = window_list->num_insn;
32314 sched_insn_info *window = window_list->window;
32315 enum dispatch_group group = get_insn_group (insn);
32316 enum insn_path path = get_insn_path (insn);
32317 int num_imm_operand;
32318 int num_imm32_operand;
32319 int num_imm64_operand;
32321 if (!window_list->violation && group != disp_cmp
32322 && !fits_dispatch_window (insn))
32323 window_list->violation = true;
32325 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32326 &num_imm64_operand);
32328 /* Initialize window with new instruction. */
32329 window[num_insn].insn = insn;
32330 window[num_insn].byte_len = byte_len;
32331 window[num_insn].group = group;
32332 window[num_insn].path = path;
32333 window[num_insn].imm_bytes = imm_size;
32335 window_list->window_size += byte_len;
32336 window_list->num_insn = num_insn + 1;
32337 window_list->num_uops = window_list->num_uops + num_uops;
32338 window_list->imm_size += imm_size;
32339 window_list->num_imm += num_imm_operand;
32340 window_list->num_imm_32 += num_imm32_operand;
32341 window_list->num_imm_64 += num_imm64_operand;
32343 if (group == disp_store)
32344 window_list->num_stores += 1;
32345 else if (group == disp_load
32346 || group == disp_prefetch)
32347 window_list->num_loads += 1;
32348 else if (group == disp_load_store)
32350 window_list->num_stores += 1;
32351 window_list->num_loads += 1;
32355 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32356 If the total bytes of instructions or the number of instructions in
32357 the window exceed allowable, it allocates a new window. */
32360 add_to_dispatch_window (rtx insn)
32363 dispatch_windows *window_list;
32364 dispatch_windows *next_list;
32365 dispatch_windows *window0_list;
32366 enum insn_path path;
32367 enum dispatch_group insn_group;
32375 if (INSN_CODE (insn) < 0)
32378 byte_len = min_insn_size (insn);
32379 window_list = dispatch_window_list;
32380 next_list = window_list->next;
32381 path = get_insn_path (insn);
32382 insn_group = get_insn_group (insn);
32384 /* Get the last dispatch window. */
32386 window_list = dispatch_window_list->next;
32388 if (path == path_single)
32390 else if (path == path_double)
32393 insn_num_uops = (int) path;
32395 /* If current window is full, get a new window.
32396 Window number zero is full, if MAX_INSN uops are scheduled in it.
32397 Window number one is full, if window zero's bytes plus window
32398 one's bytes is 32, or if the bytes of the new instruction added
32399 to the total makes it greater than 48, or it has already MAX_INSN
32400 instructions in it. */
32401 num_insn = window_list->num_insn;
32402 num_uops = window_list->num_uops;
32403 window_num = window_list->window_num;
32404 insn_fits = fits_dispatch_window (insn);
32406 if (num_insn >= MAX_INSN
32407 || num_uops + insn_num_uops > MAX_INSN
32410 window_num = ~window_num & 1;
32411 window_list = allocate_next_window (window_num);
32414 if (window_num == 0)
32416 add_insn_window (insn, window_list, insn_num_uops);
32417 if (window_list->num_insn >= MAX_INSN
32418 && insn_group == disp_branch)
32420 process_end_window ();
32424 else if (window_num == 1)
32426 window0_list = window_list->prev;
32427 sum = window0_list->window_size + window_list->window_size;
32429 || (byte_len + sum) >= 48)
32431 process_end_window ();
32432 window_list = dispatch_window_list;
32435 add_insn_window (insn, window_list, insn_num_uops);
32438 gcc_unreachable ();
32440 if (is_end_basic_block (insn_group))
32442 /* End of basic block is reached do end-basic-block process. */
32443 process_end_window ();
32448 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32450 DEBUG_FUNCTION static void
32451 debug_dispatch_window_file (FILE *file, int window_num)
32453 dispatch_windows *list;
32456 if (window_num == 0)
32457 list = dispatch_window_list;
32459 list = dispatch_window_list1;
32461 fprintf (file, "Window #%d:\n", list->window_num);
32462 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32463 list->num_insn, list->num_uops, list->window_size);
32464 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32465 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32467 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32469 fprintf (file, " insn info:\n");
32471 for (i = 0; i < MAX_INSN; i++)
32473 if (!list->window[i].insn)
32475 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32476 i, group_name[list->window[i].group],
32477 i, (void *)list->window[i].insn,
32478 i, list->window[i].path,
32479 i, list->window[i].byte_len,
32480 i, list->window[i].imm_bytes);
32484 /* Print to stdout a dispatch window. */
32486 DEBUG_FUNCTION void
32487 debug_dispatch_window (int window_num)
32489 debug_dispatch_window_file (stdout, window_num);
32492 /* Print INSN dispatch information to FILE. */
32494 DEBUG_FUNCTION static void
32495 debug_insn_dispatch_info_file (FILE *file, rtx insn)
32498 enum insn_path path;
32499 enum dispatch_group group;
32501 int num_imm_operand;
32502 int num_imm32_operand;
32503 int num_imm64_operand;
32505 if (INSN_CODE (insn) < 0)
32508 byte_len = min_insn_size (insn);
32509 path = get_insn_path (insn);
32510 group = get_insn_group (insn);
32511 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32512 &num_imm64_operand);
32514 fprintf (file, " insn info:\n");
32515 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
32516 group_name[group], path, byte_len);
32517 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32518 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
32521 /* Print to STDERR the status of the ready list with respect to
32522 dispatch windows. */
32524 DEBUG_FUNCTION void
32525 debug_ready_dispatch (void)
32528 int no_ready = number_in_ready ();
32530 fprintf (stdout, "Number of ready: %d\n", no_ready);
32532 for (i = 0; i < no_ready; i++)
32533 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
32536 /* This routine is the driver of the dispatch scheduler. */
32539 do_dispatch (rtx insn, int mode)
32541 if (mode == DISPATCH_INIT)
32542 init_dispatch_sched ();
32543 else if (mode == ADD_TO_DISPATCH_WINDOW)
32544 add_to_dispatch_window (insn);
32547 /* Return TRUE if Dispatch Scheduling is supported. */
32550 has_dispatch (rtx insn, int action)
32552 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
32558 case IS_DISPATCH_ON:
32563 return is_cmp (insn);
32565 case DISPATCH_VIOLATION:
32566 return dispatch_violation ();
32568 case FITS_DISPATCH_WINDOW:
32569 return fits_dispatch_window (insn);
32575 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
32576 place emms and femms instructions. */
32578 static unsigned int
32579 ix86_units_per_simd_word (enum machine_mode mode)
32581 /* Disable double precision vectorizer if needed. */
32582 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
32583 return UNITS_PER_WORD;
32586 /* FIXME: AVX has 32byte floating point vector operations and 16byte
32587 integer vector operations. But vectorizer doesn't support
32588 different sizes for integer and floating point vectors. We limit
32589 vector size to 16byte. */
32591 return (mode == DFmode || mode == SFmode) ? 32 : 16;
32594 return TARGET_SSE ? 16 : UNITS_PER_WORD;
32597 /* Initialize the GCC target structure. */
32598 #undef TARGET_RETURN_IN_MEMORY
32599 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
32601 #undef TARGET_LEGITIMIZE_ADDRESS
32602 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
32604 #undef TARGET_ATTRIBUTE_TABLE
32605 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
32606 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32607 # undef TARGET_MERGE_DECL_ATTRIBUTES
32608 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
32611 #undef TARGET_COMP_TYPE_ATTRIBUTES
32612 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
32614 #undef TARGET_INIT_BUILTINS
32615 #define TARGET_INIT_BUILTINS ix86_init_builtins
32616 #undef TARGET_BUILTIN_DECL
32617 #define TARGET_BUILTIN_DECL ix86_builtin_decl
32618 #undef TARGET_EXPAND_BUILTIN
32619 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
32621 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
32622 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
32623 ix86_builtin_vectorized_function
32625 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
32626 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
32628 #undef TARGET_BUILTIN_RECIPROCAL
32629 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
32631 #undef TARGET_ASM_FUNCTION_EPILOGUE
32632 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
32634 #undef TARGET_ENCODE_SECTION_INFO
32635 #ifndef SUBTARGET_ENCODE_SECTION_INFO
32636 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
32638 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
32641 #undef TARGET_ASM_OPEN_PAREN
32642 #define TARGET_ASM_OPEN_PAREN ""
32643 #undef TARGET_ASM_CLOSE_PAREN
32644 #define TARGET_ASM_CLOSE_PAREN ""
32646 #undef TARGET_ASM_BYTE_OP
32647 #define TARGET_ASM_BYTE_OP ASM_BYTE
32649 #undef TARGET_ASM_ALIGNED_HI_OP
32650 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
32651 #undef TARGET_ASM_ALIGNED_SI_OP
32652 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
32654 #undef TARGET_ASM_ALIGNED_DI_OP
32655 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
32658 #undef TARGET_PROFILE_BEFORE_PROLOGUE
32659 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
32661 #undef TARGET_ASM_UNALIGNED_HI_OP
32662 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
32663 #undef TARGET_ASM_UNALIGNED_SI_OP
32664 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
32665 #undef TARGET_ASM_UNALIGNED_DI_OP
32666 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
32668 #undef TARGET_PRINT_OPERAND
32669 #define TARGET_PRINT_OPERAND ix86_print_operand
32670 #undef TARGET_PRINT_OPERAND_ADDRESS
32671 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
32672 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
32673 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
32674 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
32675 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
32677 #undef TARGET_SCHED_ADJUST_COST
32678 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
32679 #undef TARGET_SCHED_ISSUE_RATE
32680 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
32681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
32682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
32683 ia32_multipass_dfa_lookahead
32685 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
32686 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
32689 #undef TARGET_HAVE_TLS
32690 #define TARGET_HAVE_TLS true
32692 #undef TARGET_CANNOT_FORCE_CONST_MEM
32693 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
32694 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
32695 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
32697 #undef TARGET_DELEGITIMIZE_ADDRESS
32698 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
32700 #undef TARGET_MS_BITFIELD_LAYOUT_P
32701 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
32704 #undef TARGET_BINDS_LOCAL_P
32705 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
32707 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32708 #undef TARGET_BINDS_LOCAL_P
32709 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
32712 #undef TARGET_ASM_OUTPUT_MI_THUNK
32713 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
32714 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
32715 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
32717 #undef TARGET_ASM_FILE_START
32718 #define TARGET_ASM_FILE_START x86_file_start
32720 #undef TARGET_DEFAULT_TARGET_FLAGS
32721 #define TARGET_DEFAULT_TARGET_FLAGS \
32723 | TARGET_SUBTARGET_DEFAULT \
32724 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
32727 #undef TARGET_HANDLE_OPTION
32728 #define TARGET_HANDLE_OPTION ix86_handle_option
32730 #undef TARGET_OPTION_OVERRIDE
32731 #define TARGET_OPTION_OVERRIDE ix86_option_override
32732 #undef TARGET_OPTION_OPTIMIZATION
32733 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
32735 #undef TARGET_REGISTER_MOVE_COST
32736 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
32737 #undef TARGET_MEMORY_MOVE_COST
32738 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
32739 #undef TARGET_RTX_COSTS
32740 #define TARGET_RTX_COSTS ix86_rtx_costs
32741 #undef TARGET_ADDRESS_COST
32742 #define TARGET_ADDRESS_COST ix86_address_cost
32744 #undef TARGET_FIXED_CONDITION_CODE_REGS
32745 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
32746 #undef TARGET_CC_MODES_COMPATIBLE
32747 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
32749 #undef TARGET_MACHINE_DEPENDENT_REORG
32750 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
32752 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
32753 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
32755 #undef TARGET_BUILD_BUILTIN_VA_LIST
32756 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
32758 #undef TARGET_ENUM_VA_LIST_P
32759 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
32761 #undef TARGET_FN_ABI_VA_LIST
32762 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
32764 #undef TARGET_CANONICAL_VA_LIST_TYPE
32765 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
32767 #undef TARGET_EXPAND_BUILTIN_VA_START
32768 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
32770 #undef TARGET_MD_ASM_CLOBBERS
32771 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
32773 #undef TARGET_PROMOTE_PROTOTYPES
32774 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
32775 #undef TARGET_STRUCT_VALUE_RTX
32776 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
32777 #undef TARGET_SETUP_INCOMING_VARARGS
32778 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
32779 #undef TARGET_MUST_PASS_IN_STACK
32780 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
32781 #undef TARGET_FUNCTION_ARG_ADVANCE
32782 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
32783 #undef TARGET_FUNCTION_ARG
32784 #define TARGET_FUNCTION_ARG ix86_function_arg
32785 #undef TARGET_PASS_BY_REFERENCE
32786 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
32787 #undef TARGET_INTERNAL_ARG_POINTER
32788 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
32789 #undef TARGET_UPDATE_STACK_BOUNDARY
32790 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
32791 #undef TARGET_GET_DRAP_RTX
32792 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
32793 #undef TARGET_STRICT_ARGUMENT_NAMING
32794 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
32795 #undef TARGET_STATIC_CHAIN
32796 #define TARGET_STATIC_CHAIN ix86_static_chain
32797 #undef TARGET_TRAMPOLINE_INIT
32798 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
32799 #undef TARGET_RETURN_POPS_ARGS
32800 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
32802 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
32803 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
32805 #undef TARGET_SCALAR_MODE_SUPPORTED_P
32806 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
32808 #undef TARGET_VECTOR_MODE_SUPPORTED_P
32809 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
32811 #undef TARGET_C_MODE_FOR_SUFFIX
32812 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
32815 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
32816 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
32819 #ifdef SUBTARGET_INSERT_ATTRIBUTES
32820 #undef TARGET_INSERT_ATTRIBUTES
32821 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
32824 #undef TARGET_MANGLE_TYPE
32825 #define TARGET_MANGLE_TYPE ix86_mangle_type
32827 #undef TARGET_STACK_PROTECT_FAIL
32828 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
32830 #undef TARGET_FUNCTION_VALUE
32831 #define TARGET_FUNCTION_VALUE ix86_function_value
32833 #undef TARGET_FUNCTION_VALUE_REGNO_P
32834 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
32836 #undef TARGET_SECONDARY_RELOAD
32837 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
32839 #undef TARGET_CLASS_LIKELY_SPILLED_P
32840 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
32842 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
32843 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
32844 ix86_builtin_vectorization_cost
32845 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
32846 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
32847 ix86_vectorize_builtin_vec_perm
32848 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
32849 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
32850 ix86_vectorize_builtin_vec_perm_ok
32851 #undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
32852 #define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD \
32853 ix86_units_per_simd_word
32855 #undef TARGET_SET_CURRENT_FUNCTION
32856 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
32858 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
32859 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
32861 #undef TARGET_OPTION_SAVE
32862 #define TARGET_OPTION_SAVE ix86_function_specific_save
32864 #undef TARGET_OPTION_RESTORE
32865 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
32867 #undef TARGET_OPTION_PRINT
32868 #define TARGET_OPTION_PRINT ix86_function_specific_print
32870 #undef TARGET_CAN_INLINE_P
32871 #define TARGET_CAN_INLINE_P ix86_can_inline_p
32873 #undef TARGET_EXPAND_TO_RTL_HOOK
32874 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
32876 #undef TARGET_LEGITIMATE_ADDRESS_P
32877 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
32879 #undef TARGET_IRA_COVER_CLASSES
32880 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
32882 #undef TARGET_FRAME_POINTER_REQUIRED
32883 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
32885 #undef TARGET_CAN_ELIMINATE
32886 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
32888 #undef TARGET_ASM_CODE_END
32889 #define TARGET_ASM_CODE_END ix86_code_end
32891 struct gcc_target targetm = TARGET_INITIALIZER;
32893 #include "gt-i386.h"