1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1619 m_AMDFAM10 | m_GENERIC,
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1669 static enum stringop_alg stringop_alg = no_stringop;
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1711 /* The "default" register map used in 32bit mode. */
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1724 /* The "default" register map used in 64bit mode. */
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1802 /* Define parameter passing and return registers. */
1804 static int const x86_64_int_parameter_registers[6] =
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1811 CX_REG, DX_REG, R8_REG, R9_REG
1814 static int const x86_64_int_return_registers[4] =
1816 AX_REG, DX_REG, DI_REG, SI_REG
1819 /* Define the structure for the machine field in struct function. */
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1825 struct stack_local_entry *next;
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1835 saved static chain if ix86_static_chain_on_stack
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1844 <- sse_regs_save_offset
1847 [va_arg registers] |
1851 [padding2] | = to_allocate
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1900 /* -mstackrealign option */
1901 static const char ix86_force_align_arg_pointer_string[]
1902 = "force_align_arg_pointer";
1904 static rtx (*ix86_gen_leave) (void);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1918 /* Alignment for incoming stack boundary in bits specified at
1920 static unsigned int ix86_user_incoming_stack_boundary;
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1941 int ix86_section_threshold = 65536;
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1947 /* Fence to use after loop using movnt. */
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1972 #define MAX_CLASSES 4
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1990 static void predict_jump (int);
1991 static unsigned int split_stack_prologue_scratch_regno (void);
1992 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
1994 enum ix86_function_specific_strings
1996 IX86_FUNCTION_SPECIFIC_ARCH,
1997 IX86_FUNCTION_SPECIFIC_TUNE,
1998 IX86_FUNCTION_SPECIFIC_FPMATH,
1999 IX86_FUNCTION_SPECIFIC_MAX
2002 static char *ix86_target_string (int, int, const char *, const char *,
2003 const char *, bool);
2004 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2005 static void ix86_function_specific_save (struct cl_target_option *);
2006 static void ix86_function_specific_restore (struct cl_target_option *);
2007 static void ix86_function_specific_print (FILE *, int,
2008 struct cl_target_option *);
2009 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2010 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2011 static bool ix86_can_inline_p (tree, tree);
2012 static void ix86_set_current_function (tree);
2013 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2015 static enum calling_abi ix86_function_abi (const_tree);
2018 #ifndef SUBTARGET32_DEFAULT_CPU
2019 #define SUBTARGET32_DEFAULT_CPU "i386"
2022 /* The svr4 ABI for the i386 says that records and unions are returned
2024 #ifndef DEFAULT_PCC_STRUCT_RETURN
2025 #define DEFAULT_PCC_STRUCT_RETURN 1
2028 /* Whether -mtune= or -march= were specified */
2029 static int ix86_tune_defaulted;
2030 static int ix86_arch_specified;
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2152 /* Processor target table, indexed by processor number */
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2210 /* Return true if a red-zone is in use. */
2213 ix86_using_red_zone (void)
2215 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2218 /* Implement TARGET_HANDLE_OPTION. */
2221 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2228 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2241 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2257 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2262 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2270 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2275 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2288 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2296 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2301 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2309 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2314 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2322 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2327 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2335 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2340 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2348 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2353 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2354 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2364 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2365 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2372 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2376 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2384 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2385 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2389 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2397 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2398 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2402 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2410 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2411 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2415 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2423 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2424 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2428 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2436 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2437 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2441 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2449 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2450 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2454 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2462 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2463 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2467 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2475 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2476 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2480 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2488 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2489 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2493 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2501 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2502 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2506 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2514 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2515 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2519 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2527 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2528 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2532 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2540 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2545 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2553 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2558 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2559 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2568 /* Return a string that documents the current -m options. The caller is
2569 responsible for freeing the string. */
2572 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2573 const char *fpmath, bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 int mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-m64", OPTION_MASK_ISA_64BIT },
2586 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2587 { "-mfma", OPTION_MASK_ISA_FMA },
2588 { "-mxop", OPTION_MASK_ISA_XOP },
2589 { "-mlwp", OPTION_MASK_ISA_LWP },
2590 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2591 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2592 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2593 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2594 { "-msse3", OPTION_MASK_ISA_SSE3 },
2595 { "-msse2", OPTION_MASK_ISA_SSE2 },
2596 { "-msse", OPTION_MASK_ISA_SSE },
2597 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2598 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2599 { "-mmmx", OPTION_MASK_ISA_MMX },
2600 { "-mabm", OPTION_MASK_ISA_ABM },
2601 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2602 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2603 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2604 { "-maes", OPTION_MASK_ISA_AES },
2605 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2606 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2607 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2608 { "-mf16c", OPTION_MASK_ISA_F16C },
2612 static struct ix86_target_opts flag_opts[] =
2614 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2615 { "-m80387", MASK_80387 },
2616 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2617 { "-malign-double", MASK_ALIGN_DOUBLE },
2618 { "-mcld", MASK_CLD },
2619 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2620 { "-mieee-fp", MASK_IEEE_FP },
2621 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2622 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2623 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2624 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2625 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2626 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2627 { "-mno-red-zone", MASK_NO_RED_ZONE },
2628 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2629 { "-mrecip", MASK_RECIP },
2630 { "-mrtd", MASK_RTD },
2631 { "-msseregparm", MASK_SSEREGPARM },
2632 { "-mstack-arg-probe", MASK_STACK_PROBE },
2633 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2634 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2635 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2638 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2641 char target_other[40];
2650 memset (opts, '\0', sizeof (opts));
2652 /* Add -march= option. */
2655 opts[num][0] = "-march=";
2656 opts[num++][1] = arch;
2659 /* Add -mtune= option. */
2662 opts[num][0] = "-mtune=";
2663 opts[num++][1] = tune;
2666 /* Pick out the options in isa options. */
2667 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2669 if ((isa & isa_opts[i].mask) != 0)
2671 opts[num++][0] = isa_opts[i].option;
2672 isa &= ~ isa_opts[i].mask;
2676 if (isa && add_nl_p)
2678 opts[num++][0] = isa_other;
2679 sprintf (isa_other, "(other isa: %#x)", isa);
2682 /* Add flag options. */
2683 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2685 if ((flags & flag_opts[i].mask) != 0)
2687 opts[num++][0] = flag_opts[i].option;
2688 flags &= ~ flag_opts[i].mask;
2692 if (flags && add_nl_p)
2694 opts[num++][0] = target_other;
2695 sprintf (target_other, "(other flags: %#x)", flags);
2698 /* Add -fpmath= option. */
2701 opts[num][0] = "-mfpmath=";
2702 opts[num++][1] = fpmath;
2709 gcc_assert (num < ARRAY_SIZE (opts));
2711 /* Size the string. */
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2717 for (j = 0; j < 2; j++)
2719 len += strlen (opts[i][j]);
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2726 for (i = 0; i < num; i++)
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2746 for (j = 0; j < 2; j++)
2749 memcpy (ptr, opts[i][j], len2[j]);
2751 line_len += len2[j];
2756 gcc_assert (ret + len >= ptr);
2761 /* Return TRUE if software prefetching is beneficial for the
2765 software_prefetching_beneficial_p (void)
2769 case PROCESSOR_GEODE:
2771 case PROCESSOR_ATHLON:
2773 case PROCESSOR_AMDFAM10:
2781 /* Return true, if profiling code should be emitted before
2782 prologue. Otherwise it returns false.
2783 Note: For x86 with "hotfix" it is sorried. */
2785 ix86_profile_before_prologue (void)
2787 return flag_fentry != 0;
2790 /* Function that is callable from the debugger to print the current
2793 ix86_debug_options (void)
2795 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2796 ix86_arch_string, ix86_tune_string,
2797 ix86_fpmath_string, true);
2801 fprintf (stderr, "%s\n\n", opts);
2805 fputs ("<no options>\n\n", stderr);
2810 /* Override various settings based on options. If MAIN_ARGS_P, the
2811 options are from the command line, otherwise they are from
2815 ix86_option_override_internal (bool main_args_p)
2818 unsigned int ix86_arch_mask, ix86_tune_mask;
2819 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2824 /* Comes from final.c -- no real reason to change it. */
2825 #define MAX_CODE_ALIGN 16
2833 PTA_PREFETCH_SSE = 1 << 4,
2835 PTA_3DNOW_A = 1 << 6,
2839 PTA_POPCNT = 1 << 10,
2841 PTA_SSE4A = 1 << 12,
2842 PTA_NO_SAHF = 1 << 13,
2843 PTA_SSE4_1 = 1 << 14,
2844 PTA_SSE4_2 = 1 << 15,
2846 PTA_PCLMUL = 1 << 17,
2849 PTA_MOVBE = 1 << 20,
2853 PTA_FSGSBASE = 1 << 24,
2854 PTA_RDRND = 1 << 25,
2860 const char *const name; /* processor name or nickname. */
2861 const enum processor_type processor;
2862 const enum attr_cpu schedule;
2863 const unsigned /*enum pta_flags*/ flags;
2865 const processor_alias_table[] =
2867 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2868 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2869 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2870 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2871 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2872 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2873 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2874 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2875 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2876 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2877 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2878 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2879 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2883 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2884 PTA_MMX | PTA_SSE | PTA_SSE2},
2885 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX |PTA_SSE | PTA_SSE2},
2887 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2},
2889 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2891 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_CX16 | PTA_NO_SAHF},
2894 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16},
2897 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2898 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2899 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2900 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2902 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2903 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2904 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2905 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2909 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2914 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2915 {"x86-64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_NO_SAHF},
2920 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2923 {"opteron", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_NO_SAHF},
2926 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2929 {"athlon64", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_NO_SAHF},
2932 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2935 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_NO_SAHF},
2938 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2944 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2946 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2947 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2948 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2949 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2950 0 /* flags are only used for -march switch. */ },
2951 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2952 PTA_64BIT /* flags are only used for -march switch. */ },
2955 int const pta_size = ARRAY_SIZE (processor_alias_table);
2957 /* Set up prefix/suffix so the error messages refer to either the command
2958 line argument, or the attribute(target). */
2967 prefix = "option(\"";
2972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2973 SUBTARGET_OVERRIDE_OPTIONS;
2976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2977 SUBSUBTARGET_OVERRIDE_OPTIONS;
2980 /* -fPIC is the default for x86_64. */
2981 if (TARGET_MACHO && TARGET_64BIT)
2984 /* Need to check -mtune=generic first. */
2985 if (ix86_tune_string)
2987 if (!strcmp (ix86_tune_string, "generic")
2988 || !strcmp (ix86_tune_string, "i686")
2989 /* As special support for cross compilers we read -mtune=native
2990 as -mtune=generic. With native compilers we won't see the
2991 -mtune=native, as it was changed by the driver. */
2992 || !strcmp (ix86_tune_string, "native"))
2995 ix86_tune_string = "generic64";
2997 ix86_tune_string = "generic32";
2999 /* If this call is for setting the option attribute, allow the
3000 generic32/generic64 that was previously set. */
3001 else if (!main_args_p
3002 && (!strcmp (ix86_tune_string, "generic32")
3003 || !strcmp (ix86_tune_string, "generic64")))
3005 else if (!strncmp (ix86_tune_string, "generic", 7))
3006 error ("bad value (%s) for %stune=%s %s",
3007 ix86_tune_string, prefix, suffix, sw);
3008 else if (!strcmp (ix86_tune_string, "x86-64"))
3009 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3010 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3011 prefix, suffix, prefix, suffix, prefix, suffix);
3015 if (ix86_arch_string)
3016 ix86_tune_string = ix86_arch_string;
3017 if (!ix86_tune_string)
3019 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3020 ix86_tune_defaulted = 1;
3023 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3024 need to use a sensible tune option. */
3025 if (!strcmp (ix86_tune_string, "generic")
3026 || !strcmp (ix86_tune_string, "x86-64")
3027 || !strcmp (ix86_tune_string, "i686"))
3030 ix86_tune_string = "generic64";
3032 ix86_tune_string = "generic32";
3036 if (ix86_stringop_string)
3038 if (!strcmp (ix86_stringop_string, "rep_byte"))
3039 stringop_alg = rep_prefix_1_byte;
3040 else if (!strcmp (ix86_stringop_string, "libcall"))
3041 stringop_alg = libcall;
3042 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3043 stringop_alg = rep_prefix_4_byte;
3044 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3046 /* rep; movq isn't available in 32-bit code. */
3047 stringop_alg = rep_prefix_8_byte;
3048 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3049 stringop_alg = loop_1_byte;
3050 else if (!strcmp (ix86_stringop_string, "loop"))
3051 stringop_alg = loop;
3052 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3053 stringop_alg = unrolled_loop;
3055 error ("bad value (%s) for %sstringop-strategy=%s %s",
3056 ix86_stringop_string, prefix, suffix, sw);
3059 if (!ix86_arch_string)
3060 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3062 ix86_arch_specified = 1;
3064 /* Validate -mabi= value. */
3065 if (ix86_abi_string)
3067 if (strcmp (ix86_abi_string, "sysv") == 0)
3068 ix86_abi = SYSV_ABI;
3069 else if (strcmp (ix86_abi_string, "ms") == 0)
3072 error ("unknown ABI (%s) for %sabi=%s %s",
3073 ix86_abi_string, prefix, suffix, sw);
3076 ix86_abi = DEFAULT_ABI;
3078 if (ix86_cmodel_string != 0)
3080 if (!strcmp (ix86_cmodel_string, "small"))
3081 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3082 else if (!strcmp (ix86_cmodel_string, "medium"))
3083 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3084 else if (!strcmp (ix86_cmodel_string, "large"))
3085 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3087 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3088 else if (!strcmp (ix86_cmodel_string, "32"))
3089 ix86_cmodel = CM_32;
3090 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3091 ix86_cmodel = CM_KERNEL;
3093 error ("bad value (%s) for %scmodel=%s %s",
3094 ix86_cmodel_string, prefix, suffix, sw);
3098 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3099 use of rip-relative addressing. This eliminates fixups that
3100 would otherwise be needed if this object is to be placed in a
3101 DLL, and is essentially just as efficient as direct addressing. */
3102 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3103 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3104 else if (TARGET_64BIT)
3105 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3107 ix86_cmodel = CM_32;
3109 if (ix86_asm_string != 0)
3112 && !strcmp (ix86_asm_string, "intel"))
3113 ix86_asm_dialect = ASM_INTEL;
3114 else if (!strcmp (ix86_asm_string, "att"))
3115 ix86_asm_dialect = ASM_ATT;
3117 error ("bad value (%s) for %sasm=%s %s",
3118 ix86_asm_string, prefix, suffix, sw);
3120 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3121 error ("code model %qs not supported in the %s bit mode",
3122 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3123 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3124 sorry ("%i-bit mode not compiled in",
3125 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3127 for (i = 0; i < pta_size; i++)
3128 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3130 ix86_schedule = processor_alias_table[i].schedule;
3131 ix86_arch = processor_alias_table[i].processor;
3132 /* Default cpu tuning to the architecture. */
3133 ix86_tune = ix86_arch;
3135 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3136 error ("CPU you selected does not support x86-64 "
3139 if (processor_alias_table[i].flags & PTA_MMX
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3141 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3142 if (processor_alias_table[i].flags & PTA_3DNOW
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3145 if (processor_alias_table[i].flags & PTA_3DNOW_A
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3147 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3148 if (processor_alias_table[i].flags & PTA_SSE
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3151 if (processor_alias_table[i].flags & PTA_SSE2
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3154 if (processor_alias_table[i].flags & PTA_SSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3157 if (processor_alias_table[i].flags & PTA_SSSE3
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3160 if (processor_alias_table[i].flags & PTA_SSE4_1
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3163 if (processor_alias_table[i].flags & PTA_SSE4_2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3166 if (processor_alias_table[i].flags & PTA_AVX
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3168 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3169 if (processor_alias_table[i].flags & PTA_FMA
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3171 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3172 if (processor_alias_table[i].flags & PTA_SSE4A
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3175 if (processor_alias_table[i].flags & PTA_FMA4
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3177 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3178 if (processor_alias_table[i].flags & PTA_XOP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3181 if (processor_alias_table[i].flags & PTA_LWP
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3183 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3184 if (processor_alias_table[i].flags & PTA_ABM
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3186 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3187 if (processor_alias_table[i].flags & PTA_CX16
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3189 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3190 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3192 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3193 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3196 if (processor_alias_table[i].flags & PTA_MOVBE
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3198 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3199 if (processor_alias_table[i].flags & PTA_AES
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3201 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3202 if (processor_alias_table[i].flags & PTA_PCLMUL
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3204 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3205 if (processor_alias_table[i].flags & PTA_FSGSBASE
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3207 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3208 if (processor_alias_table[i].flags & PTA_RDRND
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3210 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3211 if (processor_alias_table[i].flags & PTA_F16C
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3213 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3214 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3215 x86_prefetch_sse = true;
3220 if (!strcmp (ix86_arch_string, "generic"))
3221 error ("generic CPU can be used only for %stune=%s %s",
3222 prefix, suffix, sw);
3223 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3224 error ("bad value (%s) for %sarch=%s %s",
3225 ix86_arch_string, prefix, suffix, sw);
3227 ix86_arch_mask = 1u << ix86_arch;
3228 for (i = 0; i < X86_ARCH_LAST; ++i)
3229 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_tune = processor_alias_table[i].processor;
3236 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3238 if (ix86_tune_defaulted)
3240 ix86_tune_string = "x86-64";
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_tune_string,
3243 processor_alias_table[i].name))
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3249 error ("CPU you selected does not support x86-64 "
3252 /* Intel CPUs have always interpreted SSE prefetch instructions as
3253 NOPs; so, we can enable SSE prefetch instructions even when
3254 -mtune (rather than -march) points us to a processor that has them.
3255 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3256 higher processors. */
3258 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3259 x86_prefetch_sse = true;
3263 if (ix86_tune_specified && i == pta_size)
3264 error ("bad value (%s) for %stune=%s %s",
3265 ix86_tune_string, prefix, suffix, sw);
3267 ix86_tune_mask = 1u << ix86_tune;
3268 for (i = 0; i < X86_TUNE_LAST; ++i)
3269 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3271 #ifndef USE_IX86_FRAME_POINTER
3272 #define USE_IX86_FRAME_POINTER 0
3275 /* Set the default values for switches whose default depends on TARGET_64BIT
3276 in case they weren't overwritten by command line options. */
3281 if (flag_omit_frame_pointer == 2)
3282 flag_omit_frame_pointer = 1;
3283 if (flag_asynchronous_unwind_tables == 2)
3284 flag_asynchronous_unwind_tables = 1;
3285 if (flag_pcc_struct_return == 2)
3286 flag_pcc_struct_return = 0;
3292 if (flag_omit_frame_pointer == 2)
3293 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3294 if (flag_asynchronous_unwind_tables == 2)
3295 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3296 if (flag_pcc_struct_return == 2)
3297 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3301 ix86_cost = &ix86_size_cost;
3303 ix86_cost = processor_target_table[ix86_tune].cost;
3305 /* Arrange to set up i386_stack_locals for all functions. */
3306 init_machine_status = ix86_init_machine_status;
3308 /* Validate -mregparm= value. */
3309 if (ix86_regparm_string)
3312 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3313 i = atoi (ix86_regparm_string);
3314 if (i < 0 || i > REGPARM_MAX)
3315 error ("%sregparm=%d%s is not between 0 and %d",
3316 prefix, i, suffix, REGPARM_MAX);
3321 ix86_regparm = REGPARM_MAX;
3323 /* If the user has provided any of the -malign-* options,
3324 warn and use that value only if -falign-* is not set.
3325 Remove this code in GCC 3.2 or later. */
3326 if (ix86_align_loops_string)
3328 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3329 prefix, suffix, suffix);
3330 if (align_loops == 0)
3332 i = atoi (ix86_align_loops_string);
3333 if (i < 0 || i > MAX_CODE_ALIGN)
3334 error ("%salign-loops=%d%s is not between 0 and %d",
3335 prefix, i, suffix, MAX_CODE_ALIGN);
3337 align_loops = 1 << i;
3341 if (ix86_align_jumps_string)
3343 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3344 prefix, suffix, suffix);
3345 if (align_jumps == 0)
3347 i = atoi (ix86_align_jumps_string);
3348 if (i < 0 || i > MAX_CODE_ALIGN)
3349 error ("%salign-loops=%d%s is not between 0 and %d",
3350 prefix, i, suffix, MAX_CODE_ALIGN);
3352 align_jumps = 1 << i;
3356 if (ix86_align_funcs_string)
3358 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3359 prefix, suffix, suffix);
3360 if (align_functions == 0)
3362 i = atoi (ix86_align_funcs_string);
3363 if (i < 0 || i > MAX_CODE_ALIGN)
3364 error ("%salign-loops=%d%s is not between 0 and %d",
3365 prefix, i, suffix, MAX_CODE_ALIGN);
3367 align_functions = 1 << i;
3371 /* Default align_* from the processor table. */
3372 if (align_loops == 0)
3374 align_loops = processor_target_table[ix86_tune].align_loop;
3375 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3377 if (align_jumps == 0)
3379 align_jumps = processor_target_table[ix86_tune].align_jump;
3380 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3382 if (align_functions == 0)
3384 align_functions = processor_target_table[ix86_tune].align_func;
3387 /* Validate -mbranch-cost= value, or provide default. */
3388 ix86_branch_cost = ix86_cost->branch_cost;
3389 if (ix86_branch_cost_string)
3391 i = atoi (ix86_branch_cost_string);
3393 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3395 ix86_branch_cost = i;
3397 if (ix86_section_threshold_string)
3399 i = atoi (ix86_section_threshold_string);
3401 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3403 ix86_section_threshold = i;
3406 if (ix86_tls_dialect_string)
3408 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU;
3410 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3411 ix86_tls_dialect = TLS_DIALECT_GNU2;
3413 error ("bad value (%s) for %stls-dialect=%s %s",
3414 ix86_tls_dialect_string, prefix, suffix, sw);
3417 if (ix87_precision_string)
3419 i = atoi (ix87_precision_string);
3420 if (i != 32 && i != 64 && i != 80)
3421 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3426 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3428 /* Enable by default the SSE and MMX builtins. Do allow the user to
3429 explicitly disable any of these. In particular, disabling SSE and
3430 MMX for kernel code is extremely useful. */
3431 if (!ix86_arch_specified)
3433 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3434 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3437 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3441 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3443 if (!ix86_arch_specified)
3445 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3447 /* i386 ABI does not specify red zone. It still makes sense to use it
3448 when programmer takes care to stack from being destroyed. */
3449 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3450 target_flags |= MASK_NO_RED_ZONE;
3453 /* Keep nonleaf frame pointers. */
3454 if (flag_omit_frame_pointer)
3455 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3456 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3457 flag_omit_frame_pointer = 1;
3459 /* If we're doing fast math, we don't care about comparison order
3460 wrt NaNs. This lets us use a shorter comparison sequence. */
3461 if (flag_finite_math_only)
3462 target_flags &= ~MASK_IEEE_FP;
3464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3465 since the insns won't need emulation. */
3466 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3467 target_flags &= ~MASK_NO_FANCY_MATH_387;
3469 /* Likewise, if the target doesn't have a 387, or we've specified
3470 software floating point, don't use 387 inline intrinsics. */
3472 target_flags |= MASK_NO_FANCY_MATH_387;
3474 /* Turn on MMX builtins for -msse. */
3477 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3478 x86_prefetch_sse = true;
3481 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3482 if (TARGET_SSE4_2 || TARGET_ABM)
3483 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3485 /* Validate -mpreferred-stack-boundary= value or default it to
3486 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3487 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3488 if (ix86_preferred_stack_boundary_string)
3490 i = atoi (ix86_preferred_stack_boundary_string);
3491 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3492 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3493 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3495 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3498 /* Set the default value for -mstackrealign. */
3499 if (ix86_force_align_arg_pointer == -1)
3500 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3502 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3504 /* Validate -mincoming-stack-boundary= value or default it to
3505 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3506 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3507 if (ix86_incoming_stack_boundary_string)
3509 i = atoi (ix86_incoming_stack_boundary_string);
3510 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3511 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3512 i, TARGET_64BIT ? 4 : 2);
3515 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3516 ix86_incoming_stack_boundary
3517 = ix86_user_incoming_stack_boundary;
3521 /* Accept -msseregparm only if at least SSE support is enabled. */
3522 if (TARGET_SSEREGPARM
3524 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3526 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3527 if (ix86_fpmath_string != 0)
3529 if (! strcmp (ix86_fpmath_string, "387"))
3530 ix86_fpmath = FPMATH_387;
3531 else if (! strcmp (ix86_fpmath_string, "sse"))
3535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3536 ix86_fpmath = FPMATH_387;
3539 ix86_fpmath = FPMATH_SSE;
3541 else if (! strcmp (ix86_fpmath_string, "387,sse")
3542 || ! strcmp (ix86_fpmath_string, "387+sse")
3543 || ! strcmp (ix86_fpmath_string, "sse,387")
3544 || ! strcmp (ix86_fpmath_string, "sse+387")
3545 || ! strcmp (ix86_fpmath_string, "both"))
3549 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3550 ix86_fpmath = FPMATH_387;
3552 else if (!TARGET_80387)
3554 warning (0, "387 instruction set disabled, using SSE arithmetics");
3555 ix86_fpmath = FPMATH_SSE;
3558 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3561 error ("bad value (%s) for %sfpmath=%s %s",
3562 ix86_fpmath_string, prefix, suffix, sw);
3565 /* If the i387 is disabled, then do not return values in it. */
3567 target_flags &= ~MASK_FLOAT_RETURNS;
3569 /* Use external vectorized library in vectorizing intrinsics. */
3570 if (ix86_veclibabi_string)
3572 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_svml;
3574 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3575 ix86_veclib_handler = ix86_veclibabi_acml;
3577 error ("unknown vectorization library ABI type (%s) for "
3578 "%sveclibabi=%s %s", ix86_veclibabi_string,
3579 prefix, suffix, sw);
3582 if ((!USE_IX86_FRAME_POINTER
3583 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3584 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3586 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3588 /* ??? Unwind info is not correct around the CFG unless either a frame
3589 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3590 unwind info generation to be aware of the CFG and propagating states
3592 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3593 || flag_exceptions || flag_non_call_exceptions)
3594 && flag_omit_frame_pointer
3595 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3597 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3598 warning (0, "unwind tables currently require either a frame pointer "
3599 "or %saccumulate-outgoing-args%s for correctness",
3601 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3604 /* If stack probes are required, the space used for large function
3605 arguments on the stack must also be probed, so enable
3606 -maccumulate-outgoing-args so this happens in the prologue. */
3607 if (TARGET_STACK_PROBE
3608 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3610 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3611 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3612 "for correctness", prefix, suffix);
3613 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3616 /* For sane SSE instruction set generation we need fcomi instruction.
3617 It is safe to enable all CMOVE instructions. */
3621 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3624 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3625 p = strchr (internal_label_prefix, 'X');
3626 internal_label_prefix_len = p - internal_label_prefix;
3630 /* When scheduling description is not available, disable scheduler pass
3631 so it won't slow down the compilation and make x87 code slower. */
3632 if (!TARGET_SCHEDULE)
3633 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3635 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3636 ix86_cost->simultaneous_prefetches,
3637 global_options.x_param_values,
3638 global_options_set.x_param_values);
3639 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3640 global_options.x_param_values,
3641 global_options_set.x_param_values);
3642 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3643 global_options.x_param_values,
3644 global_options_set.x_param_values);
3645 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3646 global_options.x_param_values,
3647 global_options_set.x_param_values);
3649 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3650 if (flag_prefetch_loop_arrays < 0
3653 && software_prefetching_beneficial_p ())
3654 flag_prefetch_loop_arrays = 1;
3656 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3657 can be optimized to ap = __builtin_next_arg (0). */
3658 if (!TARGET_64BIT && !flag_split_stack)
3659 targetm.expand_builtin_va_start = NULL;
3663 ix86_gen_leave = gen_leave_rex64;
3664 ix86_gen_add3 = gen_adddi3;
3665 ix86_gen_sub3 = gen_subdi3;
3666 ix86_gen_sub3_carry = gen_subdi3_carry;
3667 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3668 ix86_gen_monitor = gen_sse3_monitor64;
3669 ix86_gen_andsp = gen_anddi3;
3670 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3671 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3672 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3676 ix86_gen_leave = gen_leave;
3677 ix86_gen_add3 = gen_addsi3;
3678 ix86_gen_sub3 = gen_subsi3;
3679 ix86_gen_sub3_carry = gen_subsi3_carry;
3680 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3681 ix86_gen_monitor = gen_sse3_monitor;
3682 ix86_gen_andsp = gen_andsi3;
3683 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3684 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3685 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3689 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3691 target_flags |= MASK_CLD & ~target_flags_explicit;
3694 if (!TARGET_64BIT && flag_pic)
3696 if (flag_fentry > 0)
3697 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3700 if (flag_fentry < 0)
3702 #if defined(PROFILE_BEFORE_PROLOGUE)
3709 /* Save the initial options in case the user does function specific options */
3711 target_option_default_node = target_option_current_node
3712 = build_target_option_node ();
3715 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3718 ix86_option_override (void)
3720 ix86_option_override_internal (true);
3723 /* Update register usage after having seen the compiler flags. */
3726 ix86_conditional_register_usage (void)
3731 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3733 if (fixed_regs[i] > 1)
3734 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3735 if (call_used_regs[i] > 1)
3736 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3739 /* The PIC register, if it exists, is fixed. */
3740 j = PIC_OFFSET_TABLE_REGNUM;
3741 if (j != INVALID_REGNUM)
3742 fixed_regs[j] = call_used_regs[j] = 1;
3744 /* The MS_ABI changes the set of call-used registers. */
3745 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3747 call_used_regs[SI_REG] = 0;
3748 call_used_regs[DI_REG] = 0;
3749 call_used_regs[XMM6_REG] = 0;
3750 call_used_regs[XMM7_REG] = 0;
3751 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3752 call_used_regs[i] = 0;
3755 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3756 other call-clobbered regs for 64-bit. */
3759 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3761 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3762 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3763 && call_used_regs[i])
3764 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3767 /* If MMX is disabled, squash the registers. */
3769 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3770 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3771 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3773 /* If SSE is disabled, squash the registers. */
3775 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3776 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3777 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3779 /* If the FPU is disabled, squash the registers. */
3780 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3781 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3782 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3783 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3785 /* If 32-bit, squash the 64-bit registers. */
3788 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3790 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3796 /* Save the current options */
3799 ix86_function_specific_save (struct cl_target_option *ptr)
3801 ptr->arch = ix86_arch;
3802 ptr->schedule = ix86_schedule;
3803 ptr->tune = ix86_tune;
3804 ptr->fpmath = ix86_fpmath;
3805 ptr->branch_cost = ix86_branch_cost;
3806 ptr->tune_defaulted = ix86_tune_defaulted;
3807 ptr->arch_specified = ix86_arch_specified;
3808 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3809 ptr->ix86_target_flags_explicit = target_flags_explicit;
3811 /* The fields are char but the variables are not; make sure the
3812 values fit in the fields. */
3813 gcc_assert (ptr->arch == ix86_arch);
3814 gcc_assert (ptr->schedule == ix86_schedule);
3815 gcc_assert (ptr->tune == ix86_tune);
3816 gcc_assert (ptr->fpmath == ix86_fpmath);
3817 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3820 /* Restore the current options */
3823 ix86_function_specific_restore (struct cl_target_option *ptr)
3825 enum processor_type old_tune = ix86_tune;
3826 enum processor_type old_arch = ix86_arch;
3827 unsigned int ix86_arch_mask, ix86_tune_mask;
3830 ix86_arch = (enum processor_type) ptr->arch;
3831 ix86_schedule = (enum attr_cpu) ptr->schedule;
3832 ix86_tune = (enum processor_type) ptr->tune;
3833 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3834 ix86_branch_cost = ptr->branch_cost;
3835 ix86_tune_defaulted = ptr->tune_defaulted;
3836 ix86_arch_specified = ptr->arch_specified;
3837 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3838 target_flags_explicit = ptr->ix86_target_flags_explicit;
3840 /* Recreate the arch feature tests if the arch changed */
3841 if (old_arch != ix86_arch)
3843 ix86_arch_mask = 1u << ix86_arch;
3844 for (i = 0; i < X86_ARCH_LAST; ++i)
3845 ix86_arch_features[i]
3846 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3849 /* Recreate the tune optimization tests */
3850 if (old_tune != ix86_tune)
3852 ix86_tune_mask = 1u << ix86_tune;
3853 for (i = 0; i < X86_TUNE_LAST; ++i)
3854 ix86_tune_features[i]
3855 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3859 /* Print the current options */
3862 ix86_function_specific_print (FILE *file, int indent,
3863 struct cl_target_option *ptr)
3866 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3867 NULL, NULL, NULL, false);
3869 fprintf (file, "%*sarch = %d (%s)\n",
3872 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3873 ? cpu_names[ptr->arch]
3876 fprintf (file, "%*stune = %d (%s)\n",
3879 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3880 ? cpu_names[ptr->tune]
3883 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3884 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3885 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3886 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3890 fprintf (file, "%*s%s\n", indent, "", target_string);
3891 free (target_string);
3896 /* Inner function to process the attribute((target(...))), take an argument and
3897 set the current options from the argument. If we have a list, recursively go
3901 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3906 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3907 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3908 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3909 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3924 enum ix86_opt_type type;
3929 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3930 IX86_ATTR_ISA ("abm", OPT_mabm),
3931 IX86_ATTR_ISA ("aes", OPT_maes),
3932 IX86_ATTR_ISA ("avx", OPT_mavx),
3933 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3934 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3935 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3936 IX86_ATTR_ISA ("sse", OPT_msse),
3937 IX86_ATTR_ISA ("sse2", OPT_msse2),
3938 IX86_ATTR_ISA ("sse3", OPT_msse3),
3939 IX86_ATTR_ISA ("sse4", OPT_msse4),
3940 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3941 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3942 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3943 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3944 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3945 IX86_ATTR_ISA ("xop", OPT_mxop),
3946 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3947 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3948 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3949 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3951 /* string options */
3952 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3953 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3954 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3957 IX86_ATTR_YES ("cld",
3961 IX86_ATTR_NO ("fancy-math-387",
3962 OPT_mfancy_math_387,
3963 MASK_NO_FANCY_MATH_387),
3965 IX86_ATTR_YES ("ieee-fp",
3969 IX86_ATTR_YES ("inline-all-stringops",
3970 OPT_minline_all_stringops,
3971 MASK_INLINE_ALL_STRINGOPS),
3973 IX86_ATTR_YES ("inline-stringops-dynamically",
3974 OPT_minline_stringops_dynamically,
3975 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3977 IX86_ATTR_NO ("align-stringops",
3978 OPT_mno_align_stringops,
3979 MASK_NO_ALIGN_STRINGOPS),
3981 IX86_ATTR_YES ("recip",
3987 /* If this is a list, recurse to get the options. */
3988 if (TREE_CODE (args) == TREE_LIST)
3992 for (; args; args = TREE_CHAIN (args))
3993 if (TREE_VALUE (args)
3994 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4000 else if (TREE_CODE (args) != STRING_CST)
4003 /* Handle multiple arguments separated by commas. */
4004 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4006 while (next_optstr && *next_optstr != '\0')
4008 char *p = next_optstr;
4010 char *comma = strchr (next_optstr, ',');
4011 const char *opt_string;
4012 size_t len, opt_len;
4017 enum ix86_opt_type type = ix86_opt_unknown;
4023 len = comma - next_optstr;
4024 next_optstr = comma + 1;
4032 /* Recognize no-xxx. */
4033 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4042 /* Find the option. */
4045 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4047 type = attrs[i].type;
4048 opt_len = attrs[i].len;
4049 if (ch == attrs[i].string[0]
4050 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4051 && memcmp (p, attrs[i].string, opt_len) == 0)
4054 mask = attrs[i].mask;
4055 opt_string = attrs[i].string;
4060 /* Process the option. */
4063 error ("attribute(target(\"%s\")) is unknown", orig_p);
4067 else if (type == ix86_opt_isa)
4068 ix86_handle_option (opt, p, opt_set_p);
4070 else if (type == ix86_opt_yes || type == ix86_opt_no)
4072 if (type == ix86_opt_no)
4073 opt_set_p = !opt_set_p;
4076 target_flags |= mask;
4078 target_flags &= ~mask;
4081 else if (type == ix86_opt_str)
4085 error ("option(\"%s\") was already specified", opt_string);
4089 p_strings[opt] = xstrdup (p + opt_len);
4099 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4102 ix86_valid_target_attribute_tree (tree args)
4104 const char *orig_arch_string = ix86_arch_string;
4105 const char *orig_tune_string = ix86_tune_string;
4106 const char *orig_fpmath_string = ix86_fpmath_string;
4107 int orig_tune_defaulted = ix86_tune_defaulted;
4108 int orig_arch_specified = ix86_arch_specified;
4109 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4112 struct cl_target_option *def
4113 = TREE_TARGET_OPTION (target_option_default_node);
4115 /* Process each of the options on the chain. */
4116 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4119 /* If the changed options are different from the default, rerun
4120 ix86_option_override_internal, and then save the options away.
4121 The string options are are attribute options, and will be undone
4122 when we copy the save structure. */
4123 if (ix86_isa_flags != def->x_ix86_isa_flags
4124 || target_flags != def->x_target_flags
4125 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4126 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4127 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4129 /* If we are using the default tune= or arch=, undo the string assigned,
4130 and use the default. */
4131 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4132 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4133 else if (!orig_arch_specified)
4134 ix86_arch_string = NULL;
4136 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4137 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4138 else if (orig_tune_defaulted)
4139 ix86_tune_string = NULL;
4141 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4142 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4143 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4144 else if (!TARGET_64BIT && TARGET_SSE)
4145 ix86_fpmath_string = "sse,387";
4147 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4148 ix86_option_override_internal (false);
4150 /* Add any builtin functions with the new isa if any. */
4151 ix86_add_new_builtins (ix86_isa_flags);
4153 /* Save the current options unless we are validating options for
4155 t = build_target_option_node ();
4157 ix86_arch_string = orig_arch_string;
4158 ix86_tune_string = orig_tune_string;
4159 ix86_fpmath_string = orig_fpmath_string;
4161 /* Free up memory allocated to hold the strings */
4162 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4163 if (option_strings[i])
4164 free (option_strings[i]);
4170 /* Hook to validate attribute((target("string"))). */
4173 ix86_valid_target_attribute_p (tree fndecl,
4174 tree ARG_UNUSED (name),
4176 int ARG_UNUSED (flags))
4178 struct cl_target_option cur_target;
4180 tree old_optimize = build_optimization_node ();
4181 tree new_target, new_optimize;
4182 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4184 /* If the function changed the optimization levels as well as setting target
4185 options, start with the optimizations specified. */
4186 if (func_optimize && func_optimize != old_optimize)
4187 cl_optimization_restore (&global_options,
4188 TREE_OPTIMIZATION (func_optimize));
4190 /* The target attributes may also change some optimization flags, so update
4191 the optimization options if necessary. */
4192 cl_target_option_save (&cur_target, &global_options);
4193 new_target = ix86_valid_target_attribute_tree (args);
4194 new_optimize = build_optimization_node ();
4201 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4203 if (old_optimize != new_optimize)
4204 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4207 cl_target_option_restore (&global_options, &cur_target);
4209 if (old_optimize != new_optimize)
4210 cl_optimization_restore (&global_options,
4211 TREE_OPTIMIZATION (old_optimize));
4217 /* Hook to determine if one function can safely inline another. */
4220 ix86_can_inline_p (tree caller, tree callee)
4223 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4224 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4226 /* If callee has no option attributes, then it is ok to inline. */
4230 /* If caller has no option attributes, but callee does then it is not ok to
4232 else if (!caller_tree)
4237 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4238 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4240 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4241 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4243 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4244 != callee_opts->x_ix86_isa_flags)
4247 /* See if we have the same non-isa options. */
4248 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4251 /* See if arch, tune, etc. are the same. */
4252 else if (caller_opts->arch != callee_opts->arch)
4255 else if (caller_opts->tune != callee_opts->tune)
4258 else if (caller_opts->fpmath != callee_opts->fpmath)
4261 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4272 /* Remember the last target of ix86_set_current_function. */
4273 static GTY(()) tree ix86_previous_fndecl;
4275 /* Establish appropriate back-end context for processing the function
4276 FNDECL. The argument might be NULL to indicate processing at top
4277 level, outside of any function scope. */
4279 ix86_set_current_function (tree fndecl)
4281 /* Only change the context if the function changes. This hook is called
4282 several times in the course of compiling a function, and we don't want to
4283 slow things down too much or call target_reinit when it isn't safe. */
4284 if (fndecl && fndecl != ix86_previous_fndecl)
4286 tree old_tree = (ix86_previous_fndecl
4287 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4290 tree new_tree = (fndecl
4291 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4294 ix86_previous_fndecl = fndecl;
4295 if (old_tree == new_tree)
4300 cl_target_option_restore (&global_options,
4301 TREE_TARGET_OPTION (new_tree));
4307 struct cl_target_option *def
4308 = TREE_TARGET_OPTION (target_option_current_node);
4310 cl_target_option_restore (&global_options, def);
4317 /* Return true if this goes in large data/bss. */
4320 ix86_in_large_data_p (tree exp)
4322 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4325 /* Functions are never large data. */
4326 if (TREE_CODE (exp) == FUNCTION_DECL)
4329 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4331 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4332 if (strcmp (section, ".ldata") == 0
4333 || strcmp (section, ".lbss") == 0)
4339 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4341 /* If this is an incomplete type with size 0, then we can't put it
4342 in data because it might be too big when completed. */
4343 if (!size || size > ix86_section_threshold)
4350 /* Switch to the appropriate section for output of DECL.
4351 DECL is either a `VAR_DECL' node or a constant of some sort.
4352 RELOC indicates whether forming the initial value of DECL requires
4353 link-time relocations. */
4355 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4359 x86_64_elf_select_section (tree decl, int reloc,
4360 unsigned HOST_WIDE_INT align)
4362 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4363 && ix86_in_large_data_p (decl))
4365 const char *sname = NULL;
4366 unsigned int flags = SECTION_WRITE;
4367 switch (categorize_decl_for_section (decl, reloc))
4372 case SECCAT_DATA_REL:
4373 sname = ".ldata.rel";
4375 case SECCAT_DATA_REL_LOCAL:
4376 sname = ".ldata.rel.local";
4378 case SECCAT_DATA_REL_RO:
4379 sname = ".ldata.rel.ro";
4381 case SECCAT_DATA_REL_RO_LOCAL:
4382 sname = ".ldata.rel.ro.local";
4386 flags |= SECTION_BSS;
4389 case SECCAT_RODATA_MERGE_STR:
4390 case SECCAT_RODATA_MERGE_STR_INIT:
4391 case SECCAT_RODATA_MERGE_CONST:
4395 case SECCAT_SRODATA:
4402 /* We don't split these for medium model. Place them into
4403 default sections and hope for best. */
4408 /* We might get called with string constants, but get_named_section
4409 doesn't like them as they are not DECLs. Also, we need to set
4410 flags in that case. */
4412 return get_section (sname, flags, NULL);
4413 return get_named_section (decl, sname, reloc);
4416 return default_elf_select_section (decl, reloc, align);
4419 /* Build up a unique section name, expressed as a
4420 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4421 RELOC indicates whether the initial value of EXP requires
4422 link-time relocations. */
4424 static void ATTRIBUTE_UNUSED
4425 x86_64_elf_unique_section (tree decl, int reloc)
4427 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4428 && ix86_in_large_data_p (decl))
4430 const char *prefix = NULL;
4431 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4432 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4434 switch (categorize_decl_for_section (decl, reloc))
4437 case SECCAT_DATA_REL:
4438 case SECCAT_DATA_REL_LOCAL:
4439 case SECCAT_DATA_REL_RO:
4440 case SECCAT_DATA_REL_RO_LOCAL:
4441 prefix = one_only ? ".ld" : ".ldata";
4444 prefix = one_only ? ".lb" : ".lbss";
4447 case SECCAT_RODATA_MERGE_STR:
4448 case SECCAT_RODATA_MERGE_STR_INIT:
4449 case SECCAT_RODATA_MERGE_CONST:
4450 prefix = one_only ? ".lr" : ".lrodata";
4452 case SECCAT_SRODATA:
4459 /* We don't split these for medium model. Place them into
4460 default sections and hope for best. */
4465 const char *name, *linkonce;
4468 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4469 name = targetm.strip_name_encoding (name);
4471 /* If we're using one_only, then there needs to be a .gnu.linkonce
4472 prefix to the section name. */
4473 linkonce = one_only ? ".gnu.linkonce" : "";
4475 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4477 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4481 default_unique_section (decl, reloc);
4484 #ifdef COMMON_ASM_OP
4485 /* This says how to output assembler code to declare an
4486 uninitialized external linkage data object.
4488 For medium model x86-64 we need to use .largecomm opcode for
4491 x86_elf_aligned_common (FILE *file,
4492 const char *name, unsigned HOST_WIDE_INT size,
4495 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4496 && size > (unsigned int)ix86_section_threshold)
4497 fputs (".largecomm\t", file);
4499 fputs (COMMON_ASM_OP, file);
4500 assemble_name (file, name);
4501 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4502 size, align / BITS_PER_UNIT);
4506 /* Utility function for targets to use in implementing
4507 ASM_OUTPUT_ALIGNED_BSS. */
4510 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4511 const char *name, unsigned HOST_WIDE_INT size,
4514 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4515 && size > (unsigned int)ix86_section_threshold)
4516 switch_to_section (get_named_section (decl, ".lbss", 0));
4518 switch_to_section (bss_section);
4519 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4520 #ifdef ASM_DECLARE_OBJECT_NAME
4521 last_assemble_variable_decl = decl;
4522 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4524 /* Standard thing is just output label for the object. */
4525 ASM_OUTPUT_LABEL (file, name);
4526 #endif /* ASM_DECLARE_OBJECT_NAME */
4527 ASM_OUTPUT_SKIP (file, size ? size : 1);
4531 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4533 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4534 make the problem with not enough registers even worse. */
4535 #ifdef INSN_SCHEDULING
4537 flag_schedule_insns = 0;
4541 /* The Darwin libraries never set errno, so we might as well
4542 avoid calling them when that's the only reason we would. */
4543 flag_errno_math = 0;
4545 /* The default values of these switches depend on the TARGET_64BIT
4546 that is not known at this moment. Mark these values with 2 and
4547 let user the to override these. In case there is no command line
4548 option specifying them, we will set the defaults in
4549 ix86_option_override_internal. */
4551 flag_omit_frame_pointer = 2;
4553 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4557 flag_pcc_struct_return = 2;
4558 flag_asynchronous_unwind_tables = 2;
4559 flag_vect_cost_model = 1;
4560 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4561 SUBTARGET_OPTIMIZATION_OPTIONS;
4565 /* Decide whether we must probe the stack before any space allocation
4566 on this target. It's essentially TARGET_STACK_PROBE except when
4567 -fstack-check causes the stack to be already probed differently. */
4570 ix86_target_stack_probe (void)
4572 /* Do not probe the stack twice if static stack checking is enabled. */
4573 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4576 return TARGET_STACK_PROBE;
4579 /* Decide whether we can make a sibling call to a function. DECL is the
4580 declaration of the function being targeted by the call and EXP is the
4581 CALL_EXPR representing the call. */
4584 ix86_function_ok_for_sibcall (tree decl, tree exp)
4586 tree type, decl_or_type;
4589 /* If we are generating position-independent code, we cannot sibcall
4590 optimize any indirect call, or a direct call to a global function,
4591 as the PLT requires %ebx be live. */
4592 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4595 /* If we need to align the outgoing stack, then sibcalling would
4596 unalign the stack, which may break the called function. */
4597 if (ix86_minimum_incoming_stack_boundary (true)
4598 < PREFERRED_STACK_BOUNDARY)
4603 decl_or_type = decl;
4604 type = TREE_TYPE (decl);
4608 /* We're looking at the CALL_EXPR, we need the type of the function. */
4609 type = CALL_EXPR_FN (exp); /* pointer expression */
4610 type = TREE_TYPE (type); /* pointer type */
4611 type = TREE_TYPE (type); /* function type */
4612 decl_or_type = type;
4615 /* Check that the return value locations are the same. Like
4616 if we are returning floats on the 80387 register stack, we cannot
4617 make a sibcall from a function that doesn't return a float to a
4618 function that does or, conversely, from a function that does return
4619 a float to a function that doesn't; the necessary stack adjustment
4620 would not be executed. This is also the place we notice
4621 differences in the return value ABI. Note that it is ok for one
4622 of the functions to have void return type as long as the return
4623 value of the other is passed in a register. */
4624 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4625 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4627 if (STACK_REG_P (a) || STACK_REG_P (b))
4629 if (!rtx_equal_p (a, b))
4632 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4634 else if (!rtx_equal_p (a, b))
4639 /* The SYSV ABI has more call-clobbered registers;
4640 disallow sibcalls from MS to SYSV. */
4641 if (cfun->machine->call_abi == MS_ABI
4642 && ix86_function_type_abi (type) == SYSV_ABI)
4647 /* If this call is indirect, we'll need to be able to use a
4648 call-clobbered register for the address of the target function.
4649 Make sure that all such registers are not used for passing
4650 parameters. Note that DLLIMPORT functions are indirect. */
4652 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4654 if (ix86_function_regparm (type, NULL) >= 3)
4656 /* ??? Need to count the actual number of registers to be used,
4657 not the possible number of registers. Fix later. */
4663 /* Otherwise okay. That also includes certain types of indirect calls. */
4667 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4668 and "sseregparm" calling convention attributes;
4669 arguments as in struct attribute_spec.handler. */
4672 ix86_handle_cconv_attribute (tree *node, tree name,
4674 int flags ATTRIBUTE_UNUSED,
4677 if (TREE_CODE (*node) != FUNCTION_TYPE
4678 && TREE_CODE (*node) != METHOD_TYPE
4679 && TREE_CODE (*node) != FIELD_DECL
4680 && TREE_CODE (*node) != TYPE_DECL)
4682 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4684 *no_add_attrs = true;
4688 /* Can combine regparm with all attributes but fastcall. */
4689 if (is_attribute_p ("regparm", name))
4693 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4695 error ("fastcall and regparm attributes are not compatible");
4698 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4700 error ("regparam and thiscall attributes are not compatible");
4703 cst = TREE_VALUE (args);
4704 if (TREE_CODE (cst) != INTEGER_CST)
4706 warning (OPT_Wattributes,
4707 "%qE attribute requires an integer constant argument",
4709 *no_add_attrs = true;
4711 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4713 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4715 *no_add_attrs = true;
4723 /* Do not warn when emulating the MS ABI. */
4724 if ((TREE_CODE (*node) != FUNCTION_TYPE
4725 && TREE_CODE (*node) != METHOD_TYPE)
4726 || ix86_function_type_abi (*node) != MS_ABI)
4727 warning (OPT_Wattributes, "%qE attribute ignored",
4729 *no_add_attrs = true;
4733 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4734 if (is_attribute_p ("fastcall", name))
4736 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4738 error ("fastcall and cdecl attributes are not compatible");
4740 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4742 error ("fastcall and stdcall attributes are not compatible");
4744 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4746 error ("fastcall and regparm attributes are not compatible");
4748 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4750 error ("fastcall and thiscall attributes are not compatible");
4754 /* Can combine stdcall with fastcall (redundant), regparm and
4756 else if (is_attribute_p ("stdcall", name))
4758 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4760 error ("stdcall and cdecl attributes are not compatible");
4762 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4764 error ("stdcall and fastcall attributes are not compatible");
4766 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4768 error ("stdcall and thiscall attributes are not compatible");
4772 /* Can combine cdecl with regparm and sseregparm. */
4773 else if (is_attribute_p ("cdecl", name))
4775 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4777 error ("stdcall and cdecl attributes are not compatible");
4779 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4781 error ("fastcall and cdecl attributes are not compatible");
4783 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4785 error ("cdecl and thiscall attributes are not compatible");
4788 else if (is_attribute_p ("thiscall", name))
4790 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4791 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4793 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4795 error ("stdcall and thiscall attributes are not compatible");
4797 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4799 error ("fastcall and thiscall attributes are not compatible");
4801 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4803 error ("cdecl and thiscall attributes are not compatible");
4807 /* Can combine sseregparm with all attributes. */
4812 /* Return 0 if the attributes for two types are incompatible, 1 if they
4813 are compatible, and 2 if they are nearly compatible (which causes a
4814 warning to be generated). */
4817 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4819 /* Check for mismatch of non-default calling convention. */
4820 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4822 if (TREE_CODE (type1) != FUNCTION_TYPE
4823 && TREE_CODE (type1) != METHOD_TYPE)
4826 /* Check for mismatched fastcall/regparm types. */
4827 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4828 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4829 || (ix86_function_regparm (type1, NULL)
4830 != ix86_function_regparm (type2, NULL)))
4833 /* Check for mismatched sseregparm types. */
4834 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4835 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4838 /* Check for mismatched thiscall types. */
4839 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4840 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4843 /* Check for mismatched return types (cdecl vs stdcall). */
4844 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4845 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4851 /* Return the regparm value for a function with the indicated TYPE and DECL.
4852 DECL may be NULL when calling function indirectly
4853 or considering a libcall. */
4856 ix86_function_regparm (const_tree type, const_tree decl)
4862 return (ix86_function_type_abi (type) == SYSV_ABI
4863 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4865 regparm = ix86_regparm;
4866 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4869 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4873 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4876 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4879 /* Use register calling convention for local functions when possible. */
4881 && TREE_CODE (decl) == FUNCTION_DECL
4883 && !(profile_flag && !flag_fentry))
4885 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4886 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4889 int local_regparm, globals = 0, regno;
4891 /* Make sure no regparm register is taken by a
4892 fixed register variable. */
4893 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4894 if (fixed_regs[local_regparm])
4897 /* We don't want to use regparm(3) for nested functions as
4898 these use a static chain pointer in the third argument. */
4899 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4902 /* In 32-bit mode save a register for the split stack. */
4903 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4906 /* Each fixed register usage increases register pressure,
4907 so less registers should be used for argument passing.
4908 This functionality can be overriden by an explicit
4910 for (regno = 0; regno <= DI_REG; regno++)
4911 if (fixed_regs[regno])
4915 = globals < local_regparm ? local_regparm - globals : 0;
4917 if (local_regparm > regparm)
4918 regparm = local_regparm;
4925 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4926 DFmode (2) arguments in SSE registers for a function with the
4927 indicated TYPE and DECL. DECL may be NULL when calling function
4928 indirectly or considering a libcall. Otherwise return 0. */
4931 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4933 gcc_assert (!TARGET_64BIT);
4935 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4936 by the sseregparm attribute. */
4937 if (TARGET_SSEREGPARM
4938 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4945 error ("Calling %qD with attribute sseregparm without "
4946 "SSE/SSE2 enabled", decl);
4948 error ("Calling %qT with attribute sseregparm without "
4949 "SSE/SSE2 enabled", type);
4957 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4958 (and DFmode for SSE2) arguments in SSE registers. */
4959 if (decl && TARGET_SSE_MATH && optimize
4960 && !(profile_flag && !flag_fentry))
4962 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4963 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4965 return TARGET_SSE2 ? 2 : 1;
4971 /* Return true if EAX is live at the start of the function. Used by
4972 ix86_expand_prologue to determine if we need special help before
4973 calling allocate_stack_worker. */
4976 ix86_eax_live_at_start_p (void)
4978 /* Cheat. Don't bother working forward from ix86_function_regparm
4979 to the function type to whether an actual argument is located in
4980 eax. Instead just look at cfg info, which is still close enough
4981 to correct at this point. This gives false positives for broken
4982 functions that might use uninitialized data that happens to be
4983 allocated in eax, but who cares? */
4984 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4987 /* Value is the number of bytes of arguments automatically
4988 popped when returning from a subroutine call.
4989 FUNDECL is the declaration node of the function (as a tree),
4990 FUNTYPE is the data type of the function (as a tree),
4991 or for a library call it is an identifier node for the subroutine name.
4992 SIZE is the number of bytes of arguments passed on the stack.
4994 On the 80386, the RTD insn may be used to pop them if the number
4995 of args is fixed, but if the number is variable then the caller
4996 must pop them all. RTD can't be used for library calls now
4997 because the library is compiled with the Unix compiler.
4998 Use of RTD is a selectable option, since it is incompatible with
4999 standard Unix calling sequences. If the option is not selected,
5000 the caller must always pop the args.
5002 The attribute stdcall is equivalent to RTD on a per module basis. */
5005 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5009 /* None of the 64-bit ABIs pop arguments. */
5013 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5015 /* Cdecl functions override -mrtd, and never pop the stack. */
5016 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5018 /* Stdcall and fastcall functions will pop the stack if not
5020 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5021 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5022 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5025 if (rtd && ! stdarg_p (funtype))
5029 /* Lose any fake structure return argument if it is passed on the stack. */
5030 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5031 && !KEEP_AGGREGATE_RETURN_POINTER)
5033 int nregs = ix86_function_regparm (funtype, fundecl);
5035 return GET_MODE_SIZE (Pmode);
5041 /* Argument support functions. */
5043 /* Return true when register may be used to pass function parameters. */
5045 ix86_function_arg_regno_p (int regno)
5048 const int *parm_regs;
5053 return (regno < REGPARM_MAX
5054 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5056 return (regno < REGPARM_MAX
5057 || (TARGET_MMX && MMX_REGNO_P (regno)
5058 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5059 || (TARGET_SSE && SSE_REGNO_P (regno)
5060 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5065 if (SSE_REGNO_P (regno) && TARGET_SSE)
5070 if (TARGET_SSE && SSE_REGNO_P (regno)
5071 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5075 /* TODO: The function should depend on current function ABI but
5076 builtins.c would need updating then. Therefore we use the
5079 /* RAX is used as hidden argument to va_arg functions. */
5080 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5083 if (ix86_abi == MS_ABI)
5084 parm_regs = x86_64_ms_abi_int_parameter_registers;
5086 parm_regs = x86_64_int_parameter_registers;
5087 for (i = 0; i < (ix86_abi == MS_ABI
5088 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5089 if (regno == parm_regs[i])
5094 /* Return if we do not know how to pass TYPE solely in registers. */
5097 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5099 if (must_pass_in_stack_var_size_or_pad (mode, type))
5102 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5103 The layout_type routine is crafty and tries to trick us into passing
5104 currently unsupported vector types on the stack by using TImode. */
5105 return (!TARGET_64BIT && mode == TImode
5106 && type && TREE_CODE (type) != VECTOR_TYPE);
5109 /* It returns the size, in bytes, of the area reserved for arguments passed
5110 in registers for the function represented by fndecl dependent to the used
5113 ix86_reg_parm_stack_space (const_tree fndecl)
5115 enum calling_abi call_abi = SYSV_ABI;
5116 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5117 call_abi = ix86_function_abi (fndecl);
5119 call_abi = ix86_function_type_abi (fndecl);
5120 if (call_abi == MS_ABI)
5125 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5128 ix86_function_type_abi (const_tree fntype)
5130 if (TARGET_64BIT && fntype != NULL)
5132 enum calling_abi abi = ix86_abi;
5133 if (abi == SYSV_ABI)
5135 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5138 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5146 ix86_function_ms_hook_prologue (const_tree fn)
5148 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5150 if (decl_function_context (fn) != NULL_TREE)
5151 error_at (DECL_SOURCE_LOCATION (fn),
5152 "ms_hook_prologue is not compatible with nested function");
5159 static enum calling_abi
5160 ix86_function_abi (const_tree fndecl)
5164 return ix86_function_type_abi (TREE_TYPE (fndecl));
5167 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5170 ix86_cfun_abi (void)
5172 if (! cfun || ! TARGET_64BIT)
5174 return cfun->machine->call_abi;
5177 /* Write the extra assembler code needed to declare a function properly. */
5180 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5183 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5187 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5188 unsigned int filler_cc = 0xcccccccc;
5190 for (i = 0; i < filler_count; i += 4)
5191 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5194 ASM_OUTPUT_LABEL (asm_out_file, fname);
5196 /* Output magic byte marker, if hot-patch attribute is set. */
5201 /* leaq [%rsp + 0], %rsp */
5202 asm_fprintf (asm_out_file, ASM_BYTE
5203 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5207 /* movl.s %edi, %edi
5209 movl.s %esp, %ebp */
5210 asm_fprintf (asm_out_file, ASM_BYTE
5211 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5217 extern void init_regs (void);
5219 /* Implementation of call abi switching target hook. Specific to FNDECL
5220 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5221 for more details. */
5223 ix86_call_abi_override (const_tree fndecl)
5225 if (fndecl == NULL_TREE)
5226 cfun->machine->call_abi = ix86_abi;
5228 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5231 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5232 re-initialization of init_regs each time we switch function context since
5233 this is needed only during RTL expansion. */
5235 ix86_maybe_switch_abi (void)
5238 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5242 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5243 for a call to a function whose data type is FNTYPE.
5244 For a library call, FNTYPE is 0. */
5247 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5248 tree fntype, /* tree ptr for function decl */
5249 rtx libname, /* SYMBOL_REF of library name or 0 */
5252 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5253 memset (cum, 0, sizeof (*cum));
5256 cum->call_abi = ix86_function_abi (fndecl);
5258 cum->call_abi = ix86_function_type_abi (fntype);
5259 /* Set up the number of registers to use for passing arguments. */
5261 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5262 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5263 "or subtarget optimization implying it");
5264 cum->nregs = ix86_regparm;
5267 cum->nregs = (cum->call_abi == SYSV_ABI
5268 ? X86_64_REGPARM_MAX
5269 : X86_64_MS_REGPARM_MAX);
5273 cum->sse_nregs = SSE_REGPARM_MAX;
5276 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5277 ? X86_64_SSE_REGPARM_MAX
5278 : X86_64_MS_SSE_REGPARM_MAX);
5282 cum->mmx_nregs = MMX_REGPARM_MAX;
5283 cum->warn_avx = true;
5284 cum->warn_sse = true;
5285 cum->warn_mmx = true;
5287 /* Because type might mismatch in between caller and callee, we need to
5288 use actual type of function for local calls.
5289 FIXME: cgraph_analyze can be told to actually record if function uses
5290 va_start so for local functions maybe_vaarg can be made aggressive
5292 FIXME: once typesytem is fixed, we won't need this code anymore. */
5294 fntype = TREE_TYPE (fndecl);
5295 cum->maybe_vaarg = (fntype
5296 ? (!prototype_p (fntype) || stdarg_p (fntype))
5301 /* If there are variable arguments, then we won't pass anything
5302 in registers in 32-bit mode. */
5303 if (stdarg_p (fntype))
5314 /* Use ecx and edx registers if function has fastcall attribute,
5315 else look for regparm information. */
5318 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5321 cum->fastcall = 1; /* Same first register as in fastcall. */
5323 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5329 cum->nregs = ix86_function_regparm (fntype, fndecl);
5332 /* Set up the number of SSE registers used for passing SFmode
5333 and DFmode arguments. Warn for mismatching ABI. */
5334 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5338 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5339 But in the case of vector types, it is some vector mode.
5341 When we have only some of our vector isa extensions enabled, then there
5342 are some modes for which vector_mode_supported_p is false. For these
5343 modes, the generic vector support in gcc will choose some non-vector mode
5344 in order to implement the type. By computing the natural mode, we'll
5345 select the proper ABI location for the operand and not depend on whatever
5346 the middle-end decides to do with these vector types.
5348 The midde-end can't deal with the vector types > 16 bytes. In this
5349 case, we return the original mode and warn ABI change if CUM isn't
5352 static enum machine_mode
5353 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5355 enum machine_mode mode = TYPE_MODE (type);
5357 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5359 HOST_WIDE_INT size = int_size_in_bytes (type);
5360 if ((size == 8 || size == 16 || size == 32)
5361 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5362 && TYPE_VECTOR_SUBPARTS (type) > 1)
5364 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5366 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5367 mode = MIN_MODE_VECTOR_FLOAT;
5369 mode = MIN_MODE_VECTOR_INT;
5371 /* Get the mode which has this inner mode and number of units. */
5372 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5373 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5374 && GET_MODE_INNER (mode) == innermode)
5376 if (size == 32 && !TARGET_AVX)
5378 static bool warnedavx;
5385 warning (0, "AVX vector argument without AVX "
5386 "enabled changes the ABI");
5388 return TYPE_MODE (type);
5401 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5402 this may not agree with the mode that the type system has chosen for the
5403 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5404 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5407 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5412 if (orig_mode != BLKmode)
5413 tmp = gen_rtx_REG (orig_mode, regno);
5416 tmp = gen_rtx_REG (mode, regno);
5417 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5418 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5424 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5425 of this code is to classify each 8bytes of incoming argument by the register
5426 class and assign registers accordingly. */
5428 /* Return the union class of CLASS1 and CLASS2.
5429 See the x86-64 PS ABI for details. */
5431 static enum x86_64_reg_class
5432 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5434 /* Rule #1: If both classes are equal, this is the resulting class. */
5435 if (class1 == class2)
5438 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5440 if (class1 == X86_64_NO_CLASS)
5442 if (class2 == X86_64_NO_CLASS)
5445 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5446 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5447 return X86_64_MEMORY_CLASS;
5449 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5450 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5451 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5452 return X86_64_INTEGERSI_CLASS;
5453 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5454 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5455 return X86_64_INTEGER_CLASS;
5457 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5459 if (class1 == X86_64_X87_CLASS
5460 || class1 == X86_64_X87UP_CLASS
5461 || class1 == X86_64_COMPLEX_X87_CLASS
5462 || class2 == X86_64_X87_CLASS
5463 || class2 == X86_64_X87UP_CLASS
5464 || class2 == X86_64_COMPLEX_X87_CLASS)
5465 return X86_64_MEMORY_CLASS;
5467 /* Rule #6: Otherwise class SSE is used. */
5468 return X86_64_SSE_CLASS;
5471 /* Classify the argument of type TYPE and mode MODE.
5472 CLASSES will be filled by the register class used to pass each word
5473 of the operand. The number of words is returned. In case the parameter
5474 should be passed in memory, 0 is returned. As a special case for zero
5475 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5477 BIT_OFFSET is used internally for handling records and specifies offset
5478 of the offset in bits modulo 256 to avoid overflow cases.
5480 See the x86-64 PS ABI for details.
5484 classify_argument (enum machine_mode mode, const_tree type,
5485 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5487 HOST_WIDE_INT bytes =
5488 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5489 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5491 /* Variable sized entities are always passed/returned in memory. */
5495 if (mode != VOIDmode
5496 && targetm.calls.must_pass_in_stack (mode, type))
5499 if (type && AGGREGATE_TYPE_P (type))
5503 enum x86_64_reg_class subclasses[MAX_CLASSES];
5505 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5509 for (i = 0; i < words; i++)
5510 classes[i] = X86_64_NO_CLASS;
5512 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5513 signalize memory class, so handle it as special case. */
5516 classes[0] = X86_64_NO_CLASS;
5520 /* Classify each field of record and merge classes. */
5521 switch (TREE_CODE (type))
5524 /* And now merge the fields of structure. */
5525 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5527 if (TREE_CODE (field) == FIELD_DECL)
5531 if (TREE_TYPE (field) == error_mark_node)
5534 /* Bitfields are always classified as integer. Handle them
5535 early, since later code would consider them to be
5536 misaligned integers. */
5537 if (DECL_BIT_FIELD (field))
5539 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5540 i < ((int_bit_position (field) + (bit_offset % 64))
5541 + tree_low_cst (DECL_SIZE (field), 0)
5544 merge_classes (X86_64_INTEGER_CLASS,
5551 type = TREE_TYPE (field);
5553 /* Flexible array member is ignored. */
5554 if (TYPE_MODE (type) == BLKmode
5555 && TREE_CODE (type) == ARRAY_TYPE
5556 && TYPE_SIZE (type) == NULL_TREE
5557 && TYPE_DOMAIN (type) != NULL_TREE
5558 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5563 if (!warned && warn_psabi)
5566 inform (input_location,
5567 "The ABI of passing struct with"
5568 " a flexible array member has"
5569 " changed in GCC 4.4");
5573 num = classify_argument (TYPE_MODE (type), type,
5575 (int_bit_position (field)
5576 + bit_offset) % 256);
5579 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5580 for (i = 0; i < num && (i + pos) < words; i++)
5582 merge_classes (subclasses[i], classes[i + pos]);
5589 /* Arrays are handled as small records. */
5592 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5593 TREE_TYPE (type), subclasses, bit_offset);
5597 /* The partial classes are now full classes. */
5598 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5599 subclasses[0] = X86_64_SSE_CLASS;
5600 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5601 && !((bit_offset % 64) == 0 && bytes == 4))
5602 subclasses[0] = X86_64_INTEGER_CLASS;
5604 for (i = 0; i < words; i++)
5605 classes[i] = subclasses[i % num];
5610 case QUAL_UNION_TYPE:
5611 /* Unions are similar to RECORD_TYPE but offset is always 0.
5613 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5615 if (TREE_CODE (field) == FIELD_DECL)
5619 if (TREE_TYPE (field) == error_mark_node)
5622 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5623 TREE_TYPE (field), subclasses,
5627 for (i = 0; i < num; i++)
5628 classes[i] = merge_classes (subclasses[i], classes[i]);
5639 /* When size > 16 bytes, if the first one isn't
5640 X86_64_SSE_CLASS or any other ones aren't
5641 X86_64_SSEUP_CLASS, everything should be passed in
5643 if (classes[0] != X86_64_SSE_CLASS)
5646 for (i = 1; i < words; i++)
5647 if (classes[i] != X86_64_SSEUP_CLASS)
5651 /* Final merger cleanup. */
5652 for (i = 0; i < words; i++)
5654 /* If one class is MEMORY, everything should be passed in
5656 if (classes[i] == X86_64_MEMORY_CLASS)
5659 /* The X86_64_SSEUP_CLASS should be always preceded by
5660 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5661 if (classes[i] == X86_64_SSEUP_CLASS
5662 && classes[i - 1] != X86_64_SSE_CLASS
5663 && classes[i - 1] != X86_64_SSEUP_CLASS)
5665 /* The first one should never be X86_64_SSEUP_CLASS. */
5666 gcc_assert (i != 0);
5667 classes[i] = X86_64_SSE_CLASS;
5670 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5671 everything should be passed in memory. */
5672 if (classes[i] == X86_64_X87UP_CLASS
5673 && (classes[i - 1] != X86_64_X87_CLASS))
5677 /* The first one should never be X86_64_X87UP_CLASS. */
5678 gcc_assert (i != 0);
5679 if (!warned && warn_psabi)
5682 inform (input_location,
5683 "The ABI of passing union with long double"
5684 " has changed in GCC 4.4");
5692 /* Compute alignment needed. We align all types to natural boundaries with
5693 exception of XFmode that is aligned to 64bits. */
5694 if (mode != VOIDmode && mode != BLKmode)
5696 int mode_alignment = GET_MODE_BITSIZE (mode);
5699 mode_alignment = 128;
5700 else if (mode == XCmode)
5701 mode_alignment = 256;
5702 if (COMPLEX_MODE_P (mode))
5703 mode_alignment /= 2;
5704 /* Misaligned fields are always returned in memory. */
5705 if (bit_offset % mode_alignment)
5709 /* for V1xx modes, just use the base mode */
5710 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5711 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5712 mode = GET_MODE_INNER (mode);
5714 /* Classification of atomic types. */
5719 classes[0] = X86_64_SSE_CLASS;
5722 classes[0] = X86_64_SSE_CLASS;
5723 classes[1] = X86_64_SSEUP_CLASS;
5733 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5737 classes[0] = X86_64_INTEGERSI_CLASS;
5740 else if (size <= 64)
5742 classes[0] = X86_64_INTEGER_CLASS;
5745 else if (size <= 64+32)
5747 classes[0] = X86_64_INTEGER_CLASS;
5748 classes[1] = X86_64_INTEGERSI_CLASS;
5751 else if (size <= 64+64)
5753 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5761 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5765 /* OImode shouldn't be used directly. */
5770 if (!(bit_offset % 64))
5771 classes[0] = X86_64_SSESF_CLASS;
5773 classes[0] = X86_64_SSE_CLASS;
5776 classes[0] = X86_64_SSEDF_CLASS;
5779 classes[0] = X86_64_X87_CLASS;
5780 classes[1] = X86_64_X87UP_CLASS;
5783 classes[0] = X86_64_SSE_CLASS;
5784 classes[1] = X86_64_SSEUP_CLASS;
5787 classes[0] = X86_64_SSE_CLASS;
5788 if (!(bit_offset % 64))
5794 if (!warned && warn_psabi)
5797 inform (input_location,
5798 "The ABI of passing structure with complex float"
5799 " member has changed in GCC 4.4");
5801 classes[1] = X86_64_SSESF_CLASS;
5805 classes[0] = X86_64_SSEDF_CLASS;
5806 classes[1] = X86_64_SSEDF_CLASS;
5809 classes[0] = X86_64_COMPLEX_X87_CLASS;
5812 /* This modes is larger than 16 bytes. */
5820 classes[0] = X86_64_SSE_CLASS;
5821 classes[1] = X86_64_SSEUP_CLASS;
5822 classes[2] = X86_64_SSEUP_CLASS;
5823 classes[3] = X86_64_SSEUP_CLASS;
5831 classes[0] = X86_64_SSE_CLASS;
5832 classes[1] = X86_64_SSEUP_CLASS;
5840 classes[0] = X86_64_SSE_CLASS;
5846 gcc_assert (VECTOR_MODE_P (mode));
5851 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5853 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5854 classes[0] = X86_64_INTEGERSI_CLASS;
5856 classes[0] = X86_64_INTEGER_CLASS;
5857 classes[1] = X86_64_INTEGER_CLASS;
5858 return 1 + (bytes > 8);
5862 /* Examine the argument and return set number of register required in each
5863 class. Return 0 iff parameter should be passed in memory. */
5865 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5866 int *int_nregs, int *sse_nregs)
5868 enum x86_64_reg_class regclass[MAX_CLASSES];
5869 int n = classify_argument (mode, type, regclass, 0);
5875 for (n--; n >= 0; n--)
5876 switch (regclass[n])
5878 case X86_64_INTEGER_CLASS:
5879 case X86_64_INTEGERSI_CLASS:
5882 case X86_64_SSE_CLASS:
5883 case X86_64_SSESF_CLASS:
5884 case X86_64_SSEDF_CLASS:
5887 case X86_64_NO_CLASS:
5888 case X86_64_SSEUP_CLASS:
5890 case X86_64_X87_CLASS:
5891 case X86_64_X87UP_CLASS:
5895 case X86_64_COMPLEX_X87_CLASS:
5896 return in_return ? 2 : 0;
5897 case X86_64_MEMORY_CLASS:
5903 /* Construct container for the argument used by GCC interface. See
5904 FUNCTION_ARG for the detailed description. */
5907 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5908 const_tree type, int in_return, int nintregs, int nsseregs,
5909 const int *intreg, int sse_regno)
5911 /* The following variables hold the static issued_error state. */
5912 static bool issued_sse_arg_error;
5913 static bool issued_sse_ret_error;
5914 static bool issued_x87_ret_error;
5916 enum machine_mode tmpmode;
5918 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5919 enum x86_64_reg_class regclass[MAX_CLASSES];
5923 int needed_sseregs, needed_intregs;
5924 rtx exp[MAX_CLASSES];
5927 n = classify_argument (mode, type, regclass, 0);
5930 if (!examine_argument (mode, type, in_return, &needed_intregs,
5933 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5936 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5937 some less clueful developer tries to use floating-point anyway. */
5938 if (needed_sseregs && !TARGET_SSE)
5942 if (!issued_sse_ret_error)
5944 error ("SSE register return with SSE disabled");
5945 issued_sse_ret_error = true;
5948 else if (!issued_sse_arg_error)
5950 error ("SSE register argument with SSE disabled");
5951 issued_sse_arg_error = true;
5956 /* Likewise, error if the ABI requires us to return values in the
5957 x87 registers and the user specified -mno-80387. */
5958 if (!TARGET_80387 && in_return)
5959 for (i = 0; i < n; i++)
5960 if (regclass[i] == X86_64_X87_CLASS
5961 || regclass[i] == X86_64_X87UP_CLASS
5962 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5964 if (!issued_x87_ret_error)
5966 error ("x87 register return with x87 disabled");
5967 issued_x87_ret_error = true;
5972 /* First construct simple cases. Avoid SCmode, since we want to use
5973 single register to pass this type. */
5974 if (n == 1 && mode != SCmode)
5975 switch (regclass[0])
5977 case X86_64_INTEGER_CLASS:
5978 case X86_64_INTEGERSI_CLASS:
5979 return gen_rtx_REG (mode, intreg[0]);
5980 case X86_64_SSE_CLASS:
5981 case X86_64_SSESF_CLASS:
5982 case X86_64_SSEDF_CLASS:
5983 if (mode != BLKmode)
5984 return gen_reg_or_parallel (mode, orig_mode,
5985 SSE_REGNO (sse_regno));
5987 case X86_64_X87_CLASS:
5988 case X86_64_COMPLEX_X87_CLASS:
5989 return gen_rtx_REG (mode, FIRST_STACK_REG);
5990 case X86_64_NO_CLASS:
5991 /* Zero sized array, struct or class. */
5996 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5997 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5998 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6000 && regclass[0] == X86_64_SSE_CLASS
6001 && regclass[1] == X86_64_SSEUP_CLASS
6002 && regclass[2] == X86_64_SSEUP_CLASS
6003 && regclass[3] == X86_64_SSEUP_CLASS
6005 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6008 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6009 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6010 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6011 && regclass[1] == X86_64_INTEGER_CLASS
6012 && (mode == CDImode || mode == TImode || mode == TFmode)
6013 && intreg[0] + 1 == intreg[1])
6014 return gen_rtx_REG (mode, intreg[0]);
6016 /* Otherwise figure out the entries of the PARALLEL. */
6017 for (i = 0; i < n; i++)
6021 switch (regclass[i])
6023 case X86_64_NO_CLASS:
6025 case X86_64_INTEGER_CLASS:
6026 case X86_64_INTEGERSI_CLASS:
6027 /* Merge TImodes on aligned occasions here too. */
6028 if (i * 8 + 8 > bytes)
6029 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6030 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6034 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6035 if (tmpmode == BLKmode)
6037 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6038 gen_rtx_REG (tmpmode, *intreg),
6042 case X86_64_SSESF_CLASS:
6043 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6044 gen_rtx_REG (SFmode,
6045 SSE_REGNO (sse_regno)),
6049 case X86_64_SSEDF_CLASS:
6050 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6051 gen_rtx_REG (DFmode,
6052 SSE_REGNO (sse_regno)),
6056 case X86_64_SSE_CLASS:
6064 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6074 && regclass[1] == X86_64_SSEUP_CLASS
6075 && regclass[2] == X86_64_SSEUP_CLASS
6076 && regclass[3] == X86_64_SSEUP_CLASS);
6083 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6084 gen_rtx_REG (tmpmode,
6085 SSE_REGNO (sse_regno)),
6094 /* Empty aligned struct, union or class. */
6098 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6099 for (i = 0; i < nexps; i++)
6100 XVECEXP (ret, 0, i) = exp [i];
6104 /* Update the data in CUM to advance over an argument of mode MODE
6105 and data type TYPE. (TYPE is null for libcalls where that information
6106 may not be available.) */
6109 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6110 const_tree type, HOST_WIDE_INT bytes,
6111 HOST_WIDE_INT words)
6127 cum->words += words;
6128 cum->nregs -= words;
6129 cum->regno += words;
6131 if (cum->nregs <= 0)
6139 /* OImode shouldn't be used directly. */
6143 if (cum->float_in_sse < 2)
6146 if (cum->float_in_sse < 1)
6163 if (!type || !AGGREGATE_TYPE_P (type))
6165 cum->sse_words += words;
6166 cum->sse_nregs -= 1;
6167 cum->sse_regno += 1;
6168 if (cum->sse_nregs <= 0)
6182 if (!type || !AGGREGATE_TYPE_P (type))
6184 cum->mmx_words += words;
6185 cum->mmx_nregs -= 1;
6186 cum->mmx_regno += 1;
6187 if (cum->mmx_nregs <= 0)
6198 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6199 const_tree type, HOST_WIDE_INT words, bool named)
6201 int int_nregs, sse_nregs;
6203 /* Unnamed 256bit vector mode parameters are passed on stack. */
6204 if (!named && VALID_AVX256_REG_MODE (mode))
6207 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6208 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6210 cum->nregs -= int_nregs;
6211 cum->sse_nregs -= sse_nregs;
6212 cum->regno += int_nregs;
6213 cum->sse_regno += sse_nregs;
6217 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6218 cum->words = (cum->words + align - 1) & ~(align - 1);
6219 cum->words += words;
6224 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6225 HOST_WIDE_INT words)
6227 /* Otherwise, this should be passed indirect. */
6228 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6230 cum->words += words;
6238 /* Update the data in CUM to advance over an argument of mode MODE and
6239 data type TYPE. (TYPE is null for libcalls where that information
6240 may not be available.) */
6243 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6244 const_tree type, bool named)
6246 HOST_WIDE_INT bytes, words;
6248 if (mode == BLKmode)
6249 bytes = int_size_in_bytes (type);
6251 bytes = GET_MODE_SIZE (mode);
6252 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6255 mode = type_natural_mode (type, NULL);
6257 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6258 function_arg_advance_ms_64 (cum, bytes, words);
6259 else if (TARGET_64BIT)
6260 function_arg_advance_64 (cum, mode, type, words, named);
6262 function_arg_advance_32 (cum, mode, type, bytes, words);
6265 /* Define where to put the arguments to a function.
6266 Value is zero to push the argument on the stack,
6267 or a hard register in which to store the argument.
6269 MODE is the argument's machine mode.
6270 TYPE is the data type of the argument (as a tree).
6271 This is null for libcalls where that information may
6273 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6274 the preceding args and about the function being called.
6275 NAMED is nonzero if this argument is a named parameter
6276 (otherwise it is an extra parameter matching an ellipsis). */
6279 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6280 enum machine_mode orig_mode, const_tree type,
6281 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6283 static bool warnedsse, warnedmmx;
6285 /* Avoid the AL settings for the Unix64 ABI. */
6286 if (mode == VOIDmode)
6302 if (words <= cum->nregs)
6304 int regno = cum->regno;
6306 /* Fastcall allocates the first two DWORD (SImode) or
6307 smaller arguments to ECX and EDX if it isn't an
6313 || (type && AGGREGATE_TYPE_P (type)))
6316 /* ECX not EAX is the first allocated register. */
6317 if (regno == AX_REG)
6320 return gen_rtx_REG (mode, regno);
6325 if (cum->float_in_sse < 2)
6328 if (cum->float_in_sse < 1)
6332 /* In 32bit, we pass TImode in xmm registers. */
6339 if (!type || !AGGREGATE_TYPE_P (type))
6341 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6344 warning (0, "SSE vector argument without SSE enabled "
6348 return gen_reg_or_parallel (mode, orig_mode,
6349 cum->sse_regno + FIRST_SSE_REG);
6354 /* OImode shouldn't be used directly. */
6363 if (!type || !AGGREGATE_TYPE_P (type))
6366 return gen_reg_or_parallel (mode, orig_mode,
6367 cum->sse_regno + FIRST_SSE_REG);
6377 if (!type || !AGGREGATE_TYPE_P (type))
6379 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6382 warning (0, "MMX vector argument without MMX enabled "
6386 return gen_reg_or_parallel (mode, orig_mode,
6387 cum->mmx_regno + FIRST_MMX_REG);
6396 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6397 enum machine_mode orig_mode, const_tree type, bool named)
6399 /* Handle a hidden AL argument containing number of registers
6400 for varargs x86-64 functions. */
6401 if (mode == VOIDmode)
6402 return GEN_INT (cum->maybe_vaarg
6403 ? (cum->sse_nregs < 0
6404 ? X86_64_SSE_REGPARM_MAX
6419 /* Unnamed 256bit vector mode parameters are passed on stack. */
6425 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6427 &x86_64_int_parameter_registers [cum->regno],
6432 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6433 enum machine_mode orig_mode, bool named,
6434 HOST_WIDE_INT bytes)
6438 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6439 We use value of -2 to specify that current function call is MSABI. */
6440 if (mode == VOIDmode)
6441 return GEN_INT (-2);
6443 /* If we've run out of registers, it goes on the stack. */
6444 if (cum->nregs == 0)
6447 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6449 /* Only floating point modes are passed in anything but integer regs. */
6450 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6453 regno = cum->regno + FIRST_SSE_REG;
6458 /* Unnamed floating parameters are passed in both the
6459 SSE and integer registers. */
6460 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6461 t2 = gen_rtx_REG (mode, regno);
6462 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6463 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6464 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6467 /* Handle aggregated types passed in register. */
6468 if (orig_mode == BLKmode)
6470 if (bytes > 0 && bytes <= 8)
6471 mode = (bytes > 4 ? DImode : SImode);
6472 if (mode == BLKmode)
6476 return gen_reg_or_parallel (mode, orig_mode, regno);
6479 /* Return where to put the arguments to a function.
6480 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6482 MODE is the argument's machine mode. TYPE is the data type of the
6483 argument. It is null for libcalls where that information may not be
6484 available. CUM gives information about the preceding args and about
6485 the function being called. NAMED is nonzero if this argument is a
6486 named parameter (otherwise it is an extra parameter matching an
6490 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6491 const_tree type, bool named)
6493 enum machine_mode mode = omode;
6494 HOST_WIDE_INT bytes, words;
6496 if (mode == BLKmode)
6497 bytes = int_size_in_bytes (type);
6499 bytes = GET_MODE_SIZE (mode);
6500 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6502 /* To simplify the code below, represent vector types with a vector mode
6503 even if MMX/SSE are not active. */
6504 if (type && TREE_CODE (type) == VECTOR_TYPE)
6505 mode = type_natural_mode (type, cum);
6507 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6508 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6509 else if (TARGET_64BIT)
6510 return function_arg_64 (cum, mode, omode, type, named);
6512 return function_arg_32 (cum, mode, omode, type, bytes, words);
6515 /* A C expression that indicates when an argument must be passed by
6516 reference. If nonzero for an argument, a copy of that argument is
6517 made in memory and a pointer to the argument is passed instead of
6518 the argument itself. The pointer is passed in whatever way is
6519 appropriate for passing a pointer to that type. */
6522 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6523 enum machine_mode mode ATTRIBUTE_UNUSED,
6524 const_tree type, bool named ATTRIBUTE_UNUSED)
6526 /* See Windows x64 Software Convention. */
6527 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6529 int msize = (int) GET_MODE_SIZE (mode);
6532 /* Arrays are passed by reference. */
6533 if (TREE_CODE (type) == ARRAY_TYPE)
6536 if (AGGREGATE_TYPE_P (type))
6538 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6539 are passed by reference. */
6540 msize = int_size_in_bytes (type);
6544 /* __m128 is passed by reference. */
6546 case 1: case 2: case 4: case 8:
6552 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6558 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6561 contains_aligned_value_p (const_tree type)
6563 enum machine_mode mode = TYPE_MODE (type);
6564 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6568 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6570 if (TYPE_ALIGN (type) < 128)
6573 if (AGGREGATE_TYPE_P (type))
6575 /* Walk the aggregates recursively. */
6576 switch (TREE_CODE (type))
6580 case QUAL_UNION_TYPE:
6584 /* Walk all the structure fields. */
6585 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6587 if (TREE_CODE (field) == FIELD_DECL
6588 && contains_aligned_value_p (TREE_TYPE (field)))
6595 /* Just for use if some languages passes arrays by value. */
6596 if (contains_aligned_value_p (TREE_TYPE (type)))
6607 /* Gives the alignment boundary, in bits, of an argument with the
6608 specified mode and type. */
6611 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6616 /* Since the main variant type is used for call, we convert it to
6617 the main variant type. */
6618 type = TYPE_MAIN_VARIANT (type);
6619 align = TYPE_ALIGN (type);
6622 align = GET_MODE_ALIGNMENT (mode);
6623 if (align < PARM_BOUNDARY)
6624 align = PARM_BOUNDARY;
6625 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6626 natural boundaries. */
6627 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6629 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6630 make an exception for SSE modes since these require 128bit
6633 The handling here differs from field_alignment. ICC aligns MMX
6634 arguments to 4 byte boundaries, while structure fields are aligned
6635 to 8 byte boundaries. */
6638 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6639 align = PARM_BOUNDARY;
6643 if (!contains_aligned_value_p (type))
6644 align = PARM_BOUNDARY;
6647 if (align > BIGGEST_ALIGNMENT)
6648 align = BIGGEST_ALIGNMENT;
6652 /* Return true if N is a possible register number of function value. */
6655 ix86_function_value_regno_p (const unsigned int regno)
6662 case FIRST_FLOAT_REG:
6663 /* TODO: The function should depend on current function ABI but
6664 builtins.c would need updating then. Therefore we use the
6666 if (TARGET_64BIT && ix86_abi == MS_ABI)
6668 return TARGET_FLOAT_RETURNS_IN_80387;
6674 if (TARGET_MACHO || TARGET_64BIT)
6682 /* Define how to find the value returned by a function.
6683 VALTYPE is the data type of the value (as a tree).
6684 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6685 otherwise, FUNC is 0. */
6688 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6689 const_tree fntype, const_tree fn)
6693 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6694 we normally prevent this case when mmx is not available. However
6695 some ABIs may require the result to be returned like DImode. */
6696 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6697 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6699 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6700 we prevent this case when sse is not available. However some ABIs
6701 may require the result to be returned like integer TImode. */
6702 else if (mode == TImode
6703 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6704 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6706 /* 32-byte vector modes in %ymm0. */
6707 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6708 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6710 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6711 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6712 regno = FIRST_FLOAT_REG;
6714 /* Most things go in %eax. */
6717 /* Override FP return register with %xmm0 for local functions when
6718 SSE math is enabled or for functions with sseregparm attribute. */
6719 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6721 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6722 if ((sse_level >= 1 && mode == SFmode)
6723 || (sse_level == 2 && mode == DFmode))
6724 regno = FIRST_SSE_REG;
6727 /* OImode shouldn't be used directly. */
6728 gcc_assert (mode != OImode);
6730 return gen_rtx_REG (orig_mode, regno);
6734 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6739 /* Handle libcalls, which don't provide a type node. */
6740 if (valtype == NULL)
6752 return gen_rtx_REG (mode, FIRST_SSE_REG);
6755 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6759 return gen_rtx_REG (mode, AX_REG);
6763 ret = construct_container (mode, orig_mode, valtype, 1,
6764 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6765 x86_64_int_return_registers, 0);
6767 /* For zero sized structures, construct_container returns NULL, but we
6768 need to keep rest of compiler happy by returning meaningful value. */
6770 ret = gen_rtx_REG (orig_mode, AX_REG);
6776 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6778 unsigned int regno = AX_REG;
6782 switch (GET_MODE_SIZE (mode))
6785 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6786 && !COMPLEX_MODE_P (mode))
6787 regno = FIRST_SSE_REG;
6791 if (mode == SFmode || mode == DFmode)
6792 regno = FIRST_SSE_REG;
6798 return gen_rtx_REG (orig_mode, regno);
6802 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6803 enum machine_mode orig_mode, enum machine_mode mode)
6805 const_tree fn, fntype;
6808 if (fntype_or_decl && DECL_P (fntype_or_decl))
6809 fn = fntype_or_decl;
6810 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6812 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6813 return function_value_ms_64 (orig_mode, mode);
6814 else if (TARGET_64BIT)
6815 return function_value_64 (orig_mode, mode, valtype);
6817 return function_value_32 (orig_mode, mode, fntype, fn);
6821 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6822 bool outgoing ATTRIBUTE_UNUSED)
6824 enum machine_mode mode, orig_mode;
6826 orig_mode = TYPE_MODE (valtype);
6827 mode = type_natural_mode (valtype, NULL);
6828 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6832 ix86_libcall_value (enum machine_mode mode)
6834 return ix86_function_value_1 (NULL, NULL, mode, mode);
6837 /* Return true iff type is returned in memory. */
6839 static bool ATTRIBUTE_UNUSED
6840 return_in_memory_32 (const_tree type, enum machine_mode mode)
6844 if (mode == BLKmode)
6847 size = int_size_in_bytes (type);
6849 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6852 if (VECTOR_MODE_P (mode) || mode == TImode)
6854 /* User-created vectors small enough to fit in EAX. */
6858 /* MMX/3dNow values are returned in MM0,
6859 except when it doesn't exits or the ABI prescribes otherwise. */
6861 return !TARGET_MMX || TARGET_VECT8_RETURNS;
6863 /* SSE values are returned in XMM0, except when it doesn't exist. */
6867 /* AVX values are returned in YMM0, except when it doesn't exist. */
6878 /* OImode shouldn't be used directly. */
6879 gcc_assert (mode != OImode);
6884 static bool ATTRIBUTE_UNUSED
6885 return_in_memory_64 (const_tree type, enum machine_mode mode)
6887 int needed_intregs, needed_sseregs;
6888 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6891 static bool ATTRIBUTE_UNUSED
6892 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6894 HOST_WIDE_INT size = int_size_in_bytes (type);
6896 /* __m128 is returned in xmm0. */
6897 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6898 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6901 /* Otherwise, the size must be exactly in [1248]. */
6902 return size != 1 && size != 2 && size != 4 && size != 8;
6906 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6908 #ifdef SUBTARGET_RETURN_IN_MEMORY
6909 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6911 const enum machine_mode mode = type_natural_mode (type, NULL);
6915 if (ix86_function_type_abi (fntype) == MS_ABI)
6916 return return_in_memory_ms_64 (type, mode);
6918 return return_in_memory_64 (type, mode);
6921 return return_in_memory_32 (type, mode);
6925 /* When returning SSE vector types, we have a choice of either
6926 (1) being abi incompatible with a -march switch, or
6927 (2) generating an error.
6928 Given no good solution, I think the safest thing is one warning.
6929 The user won't be able to use -Werror, but....
6931 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6932 called in response to actually generating a caller or callee that
6933 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6934 via aggregate_value_p for general type probing from tree-ssa. */
6937 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6939 static bool warnedsse, warnedmmx;
6941 if (!TARGET_64BIT && type)
6943 /* Look at the return type of the function, not the function type. */
6944 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6946 if (!TARGET_SSE && !warnedsse)
6949 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6952 warning (0, "SSE vector return without SSE enabled "
6957 if (!TARGET_MMX && !warnedmmx)
6959 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6962 warning (0, "MMX vector return without MMX enabled "
6972 /* Create the va_list data type. */
6974 /* Returns the calling convention specific va_list date type.
6975 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6978 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6980 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6982 /* For i386 we use plain pointer to argument area. */
6983 if (!TARGET_64BIT || abi == MS_ABI)
6984 return build_pointer_type (char_type_node);
6986 record = lang_hooks.types.make_type (RECORD_TYPE);
6987 type_decl = build_decl (BUILTINS_LOCATION,
6988 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6990 f_gpr = build_decl (BUILTINS_LOCATION,
6991 FIELD_DECL, get_identifier ("gp_offset"),
6992 unsigned_type_node);
6993 f_fpr = build_decl (BUILTINS_LOCATION,
6994 FIELD_DECL, get_identifier ("fp_offset"),
6995 unsigned_type_node);
6996 f_ovf = build_decl (BUILTINS_LOCATION,
6997 FIELD_DECL, get_identifier ("overflow_arg_area"),
6999 f_sav = build_decl (BUILTINS_LOCATION,
7000 FIELD_DECL, get_identifier ("reg_save_area"),
7003 va_list_gpr_counter_field = f_gpr;
7004 va_list_fpr_counter_field = f_fpr;
7006 DECL_FIELD_CONTEXT (f_gpr) = record;
7007 DECL_FIELD_CONTEXT (f_fpr) = record;
7008 DECL_FIELD_CONTEXT (f_ovf) = record;
7009 DECL_FIELD_CONTEXT (f_sav) = record;
7011 TREE_CHAIN (record) = type_decl;
7012 TYPE_NAME (record) = type_decl;
7013 TYPE_FIELDS (record) = f_gpr;
7014 DECL_CHAIN (f_gpr) = f_fpr;
7015 DECL_CHAIN (f_fpr) = f_ovf;
7016 DECL_CHAIN (f_ovf) = f_sav;
7018 layout_type (record);
7020 /* The correct type is an array type of one element. */
7021 return build_array_type (record, build_index_type (size_zero_node));
7024 /* Setup the builtin va_list data type and for 64-bit the additional
7025 calling convention specific va_list data types. */
7028 ix86_build_builtin_va_list (void)
7030 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7032 /* Initialize abi specific va_list builtin types. */
7036 if (ix86_abi == MS_ABI)
7038 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7039 if (TREE_CODE (t) != RECORD_TYPE)
7040 t = build_variant_type_copy (t);
7041 sysv_va_list_type_node = t;
7046 if (TREE_CODE (t) != RECORD_TYPE)
7047 t = build_variant_type_copy (t);
7048 sysv_va_list_type_node = t;
7050 if (ix86_abi != MS_ABI)
7052 t = ix86_build_builtin_va_list_abi (MS_ABI);
7053 if (TREE_CODE (t) != RECORD_TYPE)
7054 t = build_variant_type_copy (t);
7055 ms_va_list_type_node = t;
7060 if (TREE_CODE (t) != RECORD_TYPE)
7061 t = build_variant_type_copy (t);
7062 ms_va_list_type_node = t;
7069 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7072 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7078 /* GPR size of varargs save area. */
7079 if (cfun->va_list_gpr_size)
7080 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7082 ix86_varargs_gpr_size = 0;
7084 /* FPR size of varargs save area. We don't need it if we don't pass
7085 anything in SSE registers. */
7086 if (TARGET_SSE && cfun->va_list_fpr_size)
7087 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7089 ix86_varargs_fpr_size = 0;
7091 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7094 save_area = frame_pointer_rtx;
7095 set = get_varargs_alias_set ();
7097 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7098 if (max > X86_64_REGPARM_MAX)
7099 max = X86_64_REGPARM_MAX;
7101 for (i = cum->regno; i < max; i++)
7103 mem = gen_rtx_MEM (Pmode,
7104 plus_constant (save_area, i * UNITS_PER_WORD));
7105 MEM_NOTRAP_P (mem) = 1;
7106 set_mem_alias_set (mem, set);
7107 emit_move_insn (mem, gen_rtx_REG (Pmode,
7108 x86_64_int_parameter_registers[i]));
7111 if (ix86_varargs_fpr_size)
7113 enum machine_mode smode;
7116 /* Now emit code to save SSE registers. The AX parameter contains number
7117 of SSE parameter registers used to call this function, though all we
7118 actually check here is the zero/non-zero status. */
7120 label = gen_label_rtx ();
7121 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7122 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7125 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7126 we used movdqa (i.e. TImode) instead? Perhaps even better would
7127 be if we could determine the real mode of the data, via a hook
7128 into pass_stdarg. Ignore all that for now. */
7130 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7131 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7133 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7134 if (max > X86_64_SSE_REGPARM_MAX)
7135 max = X86_64_SSE_REGPARM_MAX;
7137 for (i = cum->sse_regno; i < max; ++i)
7139 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7140 mem = gen_rtx_MEM (smode, mem);
7141 MEM_NOTRAP_P (mem) = 1;
7142 set_mem_alias_set (mem, set);
7143 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7145 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7153 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7155 alias_set_type set = get_varargs_alias_set ();
7158 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7162 mem = gen_rtx_MEM (Pmode,
7163 plus_constant (virtual_incoming_args_rtx,
7164 i * UNITS_PER_WORD));
7165 MEM_NOTRAP_P (mem) = 1;
7166 set_mem_alias_set (mem, set);
7168 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7169 emit_move_insn (mem, reg);
7174 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7175 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7178 CUMULATIVE_ARGS next_cum;
7181 /* This argument doesn't appear to be used anymore. Which is good,
7182 because the old code here didn't suppress rtl generation. */
7183 gcc_assert (!no_rtl);
7188 fntype = TREE_TYPE (current_function_decl);
7190 /* For varargs, we do not want to skip the dummy va_dcl argument.
7191 For stdargs, we do want to skip the last named argument. */
7193 if (stdarg_p (fntype))
7194 ix86_function_arg_advance (&next_cum, mode, type, true);
7196 if (cum->call_abi == MS_ABI)
7197 setup_incoming_varargs_ms_64 (&next_cum);
7199 setup_incoming_varargs_64 (&next_cum);
7202 /* Checks if TYPE is of kind va_list char *. */
7205 is_va_list_char_pointer (tree type)
7209 /* For 32-bit it is always true. */
7212 canonic = ix86_canonical_va_list_type (type);
7213 return (canonic == ms_va_list_type_node
7214 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7217 /* Implement va_start. */
7220 ix86_va_start (tree valist, rtx nextarg)
7222 HOST_WIDE_INT words, n_gpr, n_fpr;
7223 tree f_gpr, f_fpr, f_ovf, f_sav;
7224 tree gpr, fpr, ovf, sav, t;
7229 if (flag_split_stack
7230 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7232 unsigned int scratch_regno;
7234 /* When we are splitting the stack, we can't refer to the stack
7235 arguments using internal_arg_pointer, because they may be on
7236 the old stack. The split stack prologue will arrange to
7237 leave a pointer to the old stack arguments in a scratch
7238 register, which we here copy to a pseudo-register. The split
7239 stack prologue can't set the pseudo-register directly because
7240 it (the prologue) runs before any registers have been saved. */
7242 scratch_regno = split_stack_prologue_scratch_regno ();
7243 if (scratch_regno != INVALID_REGNUM)
7247 reg = gen_reg_rtx (Pmode);
7248 cfun->machine->split_stack_varargs_pointer = reg;
7251 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7255 push_topmost_sequence ();
7256 emit_insn_after (seq, entry_of_function ());
7257 pop_topmost_sequence ();
7261 /* Only 64bit target needs something special. */
7262 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7264 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7265 std_expand_builtin_va_start (valist, nextarg);
7270 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7271 next = expand_binop (ptr_mode, add_optab,
7272 cfun->machine->split_stack_varargs_pointer,
7273 crtl->args.arg_offset_rtx,
7274 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7275 convert_move (va_r, next, 0);
7280 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7281 f_fpr = DECL_CHAIN (f_gpr);
7282 f_ovf = DECL_CHAIN (f_fpr);
7283 f_sav = DECL_CHAIN (f_ovf);
7285 valist = build_simple_mem_ref (valist);
7286 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7287 /* The following should be folded into the MEM_REF offset. */
7288 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7290 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7292 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7294 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7297 /* Count number of gp and fp argument registers used. */
7298 words = crtl->args.info.words;
7299 n_gpr = crtl->args.info.regno;
7300 n_fpr = crtl->args.info.sse_regno;
7302 if (cfun->va_list_gpr_size)
7304 type = TREE_TYPE (gpr);
7305 t = build2 (MODIFY_EXPR, type,
7306 gpr, build_int_cst (type, n_gpr * 8));
7307 TREE_SIDE_EFFECTS (t) = 1;
7308 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7311 if (TARGET_SSE && cfun->va_list_fpr_size)
7313 type = TREE_TYPE (fpr);
7314 t = build2 (MODIFY_EXPR, type, fpr,
7315 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7316 TREE_SIDE_EFFECTS (t) = 1;
7317 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7320 /* Find the overflow area. */
7321 type = TREE_TYPE (ovf);
7322 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7323 ovf_rtx = crtl->args.internal_arg_pointer;
7325 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7326 t = make_tree (type, ovf_rtx);
7328 t = build2 (POINTER_PLUS_EXPR, type, t,
7329 size_int (words * UNITS_PER_WORD));
7330 t = build2 (MODIFY_EXPR, type, ovf, t);
7331 TREE_SIDE_EFFECTS (t) = 1;
7332 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7334 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7336 /* Find the register save area.
7337 Prologue of the function save it right above stack frame. */
7338 type = TREE_TYPE (sav);
7339 t = make_tree (type, frame_pointer_rtx);
7340 if (!ix86_varargs_gpr_size)
7341 t = build2 (POINTER_PLUS_EXPR, type, t,
7342 size_int (-8 * X86_64_REGPARM_MAX));
7343 t = build2 (MODIFY_EXPR, type, sav, t);
7344 TREE_SIDE_EFFECTS (t) = 1;
7345 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7349 /* Implement va_arg. */
7352 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7355 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7356 tree f_gpr, f_fpr, f_ovf, f_sav;
7357 tree gpr, fpr, ovf, sav, t;
7359 tree lab_false, lab_over = NULL_TREE;
7364 enum machine_mode nat_mode;
7365 unsigned int arg_boundary;
7367 /* Only 64bit target needs something special. */
7368 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7369 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7371 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7372 f_fpr = DECL_CHAIN (f_gpr);
7373 f_ovf = DECL_CHAIN (f_fpr);
7374 f_sav = DECL_CHAIN (f_ovf);
7376 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7377 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7378 valist = build_va_arg_indirect_ref (valist);
7379 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7380 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7381 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7383 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7385 type = build_pointer_type (type);
7386 size = int_size_in_bytes (type);
7387 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7389 nat_mode = type_natural_mode (type, NULL);
7398 /* Unnamed 256bit vector mode parameters are passed on stack. */
7399 if (ix86_cfun_abi () == SYSV_ABI)
7406 container = construct_container (nat_mode, TYPE_MODE (type),
7407 type, 0, X86_64_REGPARM_MAX,
7408 X86_64_SSE_REGPARM_MAX, intreg,
7413 /* Pull the value out of the saved registers. */
7415 addr = create_tmp_var (ptr_type_node, "addr");
7419 int needed_intregs, needed_sseregs;
7421 tree int_addr, sse_addr;
7423 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7424 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7426 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7428 need_temp = (!REG_P (container)
7429 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7430 || TYPE_ALIGN (type) > 128));
7432 /* In case we are passing structure, verify that it is consecutive block
7433 on the register save area. If not we need to do moves. */
7434 if (!need_temp && !REG_P (container))
7436 /* Verify that all registers are strictly consecutive */
7437 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7441 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7443 rtx slot = XVECEXP (container, 0, i);
7444 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7445 || INTVAL (XEXP (slot, 1)) != i * 16)
7453 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7455 rtx slot = XVECEXP (container, 0, i);
7456 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7457 || INTVAL (XEXP (slot, 1)) != i * 8)
7469 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7470 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7473 /* First ensure that we fit completely in registers. */
7476 t = build_int_cst (TREE_TYPE (gpr),
7477 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7478 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7479 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7480 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7481 gimplify_and_add (t, pre_p);
7485 t = build_int_cst (TREE_TYPE (fpr),
7486 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7487 + X86_64_REGPARM_MAX * 8);
7488 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7489 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7490 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7491 gimplify_and_add (t, pre_p);
7494 /* Compute index to start of area used for integer regs. */
7497 /* int_addr = gpr + sav; */
7498 t = fold_convert (sizetype, gpr);
7499 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7500 gimplify_assign (int_addr, t, pre_p);
7504 /* sse_addr = fpr + sav; */
7505 t = fold_convert (sizetype, fpr);
7506 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7507 gimplify_assign (sse_addr, t, pre_p);
7511 int i, prev_size = 0;
7512 tree temp = create_tmp_var (type, "va_arg_tmp");
7515 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7516 gimplify_assign (addr, t, pre_p);
7518 for (i = 0; i < XVECLEN (container, 0); i++)
7520 rtx slot = XVECEXP (container, 0, i);
7521 rtx reg = XEXP (slot, 0);
7522 enum machine_mode mode = GET_MODE (reg);
7528 tree dest_addr, dest;
7529 int cur_size = GET_MODE_SIZE (mode);
7531 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7532 prev_size = INTVAL (XEXP (slot, 1));
7533 if (prev_size + cur_size > size)
7535 cur_size = size - prev_size;
7536 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7537 if (mode == BLKmode)
7540 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7541 if (mode == GET_MODE (reg))
7542 addr_type = build_pointer_type (piece_type);
7544 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7546 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7549 if (SSE_REGNO_P (REGNO (reg)))
7551 src_addr = sse_addr;
7552 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7556 src_addr = int_addr;
7557 src_offset = REGNO (reg) * 8;
7559 src_addr = fold_convert (addr_type, src_addr);
7560 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7561 size_int (src_offset));
7563 dest_addr = fold_convert (daddr_type, addr);
7564 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7565 size_int (prev_size));
7566 if (cur_size == GET_MODE_SIZE (mode))
7568 src = build_va_arg_indirect_ref (src_addr);
7569 dest = build_va_arg_indirect_ref (dest_addr);
7571 gimplify_assign (dest, src, pre_p);
7576 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7577 3, dest_addr, src_addr,
7578 size_int (cur_size));
7579 gimplify_and_add (copy, pre_p);
7581 prev_size += cur_size;
7587 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7588 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7589 gimplify_assign (gpr, t, pre_p);
7594 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7595 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7596 gimplify_assign (fpr, t, pre_p);
7599 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7601 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7604 /* ... otherwise out of the overflow area. */
7606 /* When we align parameter on stack for caller, if the parameter
7607 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7608 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7609 here with caller. */
7610 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7611 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7612 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7614 /* Care for on-stack alignment if needed. */
7615 if (arg_boundary <= 64 || size == 0)
7619 HOST_WIDE_INT align = arg_boundary / 8;
7620 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7621 size_int (align - 1));
7622 t = fold_convert (sizetype, t);
7623 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7625 t = fold_convert (TREE_TYPE (ovf), t);
7628 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7629 gimplify_assign (addr, t, pre_p);
7631 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7632 size_int (rsize * UNITS_PER_WORD));
7633 gimplify_assign (unshare_expr (ovf), t, pre_p);
7636 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7638 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7639 addr = fold_convert (ptrtype, addr);
7642 addr = build_va_arg_indirect_ref (addr);
7643 return build_va_arg_indirect_ref (addr);
7646 /* Return true if OPNUM's MEM should be matched
7647 in movabs* patterns. */
7650 ix86_check_movabs (rtx insn, int opnum)
7654 set = PATTERN (insn);
7655 if (GET_CODE (set) == PARALLEL)
7656 set = XVECEXP (set, 0, 0);
7657 gcc_assert (GET_CODE (set) == SET);
7658 mem = XEXP (set, opnum);
7659 while (GET_CODE (mem) == SUBREG)
7660 mem = SUBREG_REG (mem);
7661 gcc_assert (MEM_P (mem));
7662 return volatile_ok || !MEM_VOLATILE_P (mem);
7665 /* Initialize the table of extra 80387 mathematical constants. */
7668 init_ext_80387_constants (void)
7670 static const char * cst[5] =
7672 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7673 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7674 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7675 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7676 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7680 for (i = 0; i < 5; i++)
7682 real_from_string (&ext_80387_constants_table[i], cst[i]);
7683 /* Ensure each constant is rounded to XFmode precision. */
7684 real_convert (&ext_80387_constants_table[i],
7685 XFmode, &ext_80387_constants_table[i]);
7688 ext_80387_constants_init = 1;
7691 /* Return non-zero if the constant is something that
7692 can be loaded with a special instruction. */
7695 standard_80387_constant_p (rtx x)
7697 enum machine_mode mode = GET_MODE (x);
7701 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7704 if (x == CONST0_RTX (mode))
7706 if (x == CONST1_RTX (mode))
7709 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7711 /* For XFmode constants, try to find a special 80387 instruction when
7712 optimizing for size or on those CPUs that benefit from them. */
7714 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7718 if (! ext_80387_constants_init)
7719 init_ext_80387_constants ();
7721 for (i = 0; i < 5; i++)
7722 if (real_identical (&r, &ext_80387_constants_table[i]))
7726 /* Load of the constant -0.0 or -1.0 will be split as
7727 fldz;fchs or fld1;fchs sequence. */
7728 if (real_isnegzero (&r))
7730 if (real_identical (&r, &dconstm1))
7736 /* Return the opcode of the special instruction to be used to load
7740 standard_80387_constant_opcode (rtx x)
7742 switch (standard_80387_constant_p (x))
7766 /* Return the CONST_DOUBLE representing the 80387 constant that is
7767 loaded by the specified special instruction. The argument IDX
7768 matches the return value from standard_80387_constant_p. */
7771 standard_80387_constant_rtx (int idx)
7775 if (! ext_80387_constants_init)
7776 init_ext_80387_constants ();
7792 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7796 /* Return 1 if X is all 0s and 2 if x is all 1s
7797 in supported SSE vector mode. */
7800 standard_sse_constant_p (rtx x)
7802 enum machine_mode mode = GET_MODE (x);
7804 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7806 if (vector_all_ones_operand (x, mode))
7822 /* Return the opcode of the special instruction to be used to load
7826 standard_sse_constant_opcode (rtx insn, rtx x)
7828 switch (standard_sse_constant_p (x))
7831 switch (get_attr_mode (insn))
7834 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7836 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7837 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7839 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7841 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7842 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7844 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7846 return "vxorps\t%x0, %x0, %x0";
7848 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7849 return "vxorps\t%x0, %x0, %x0";
7851 return "vxorpd\t%x0, %x0, %x0";
7853 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7854 return "vxorps\t%x0, %x0, %x0";
7856 return "vpxor\t%x0, %x0, %x0";
7861 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7868 /* Returns true if OP contains a symbol reference */
7871 symbolic_reference_mentioned_p (rtx op)
7876 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7879 fmt = GET_RTX_FORMAT (GET_CODE (op));
7880 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7886 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7887 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7891 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7898 /* Return true if it is appropriate to emit `ret' instructions in the
7899 body of a function. Do this only if the epilogue is simple, needing a
7900 couple of insns. Prior to reloading, we can't tell how many registers
7901 must be saved, so return false then. Return false if there is no frame
7902 marker to de-allocate. */
7905 ix86_can_use_return_insn_p (void)
7907 struct ix86_frame frame;
7909 if (! reload_completed || frame_pointer_needed)
7912 /* Don't allow more than 32k pop, since that's all we can do
7913 with one instruction. */
7914 if (crtl->args.pops_args && crtl->args.size >= 32768)
7917 ix86_compute_frame_layout (&frame);
7918 return (frame.stack_pointer_offset == UNITS_PER_WORD
7919 && (frame.nregs + frame.nsseregs) == 0);
7922 /* Value should be nonzero if functions must have frame pointers.
7923 Zero means the frame pointer need not be set up (and parms may
7924 be accessed via the stack pointer) in functions that seem suitable. */
7927 ix86_frame_pointer_required (void)
7929 /* If we accessed previous frames, then the generated code expects
7930 to be able to access the saved ebp value in our frame. */
7931 if (cfun->machine->accesses_prev_frame)
7934 /* Several x86 os'es need a frame pointer for other reasons,
7935 usually pertaining to setjmp. */
7936 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7939 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7940 turns off the frame pointer by default. Turn it back on now if
7941 we've not got a leaf function. */
7942 if (TARGET_OMIT_LEAF_FRAME_POINTER
7943 && (!current_function_is_leaf
7944 || ix86_current_function_calls_tls_descriptor))
7947 if (crtl->profile && !flag_fentry)
7953 /* Record that the current function accesses previous call frames. */
7956 ix86_setup_frame_addresses (void)
7958 cfun->machine->accesses_prev_frame = 1;
7961 #ifndef USE_HIDDEN_LINKONCE
7962 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7963 # define USE_HIDDEN_LINKONCE 1
7965 # define USE_HIDDEN_LINKONCE 0
7969 static int pic_labels_used;
7971 /* Fills in the label name that should be used for a pc thunk for
7972 the given register. */
7975 get_pc_thunk_name (char name[32], unsigned int regno)
7977 gcc_assert (!TARGET_64BIT);
7979 if (USE_HIDDEN_LINKONCE)
7980 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7982 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7986 /* This function generates code for -fpic that loads %ebx with
7987 the return address of the caller and then returns. */
7990 ix86_code_end (void)
7995 for (regno = AX_REG; regno <= SP_REG; regno++)
8000 if (!(pic_labels_used & (1 << regno)))
8003 get_pc_thunk_name (name, regno);
8005 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8006 get_identifier (name),
8007 build_function_type (void_type_node, void_list_node));
8008 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8009 NULL_TREE, void_type_node);
8010 TREE_PUBLIC (decl) = 1;
8011 TREE_STATIC (decl) = 1;
8016 switch_to_section (darwin_sections[text_coal_section]);
8017 fputs ("\t.weak_definition\t", asm_out_file);
8018 assemble_name (asm_out_file, name);
8019 fputs ("\n\t.private_extern\t", asm_out_file);
8020 assemble_name (asm_out_file, name);
8021 putc ('\n', asm_out_file);
8022 ASM_OUTPUT_LABEL (asm_out_file, name);
8023 DECL_WEAK (decl) = 1;
8027 if (USE_HIDDEN_LINKONCE)
8029 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8031 targetm.asm_out.unique_section (decl, 0);
8032 switch_to_section (get_named_section (decl, NULL, 0));
8034 targetm.asm_out.globalize_label (asm_out_file, name);
8035 fputs ("\t.hidden\t", asm_out_file);
8036 assemble_name (asm_out_file, name);
8037 putc ('\n', asm_out_file);
8038 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8042 switch_to_section (text_section);
8043 ASM_OUTPUT_LABEL (asm_out_file, name);
8046 DECL_INITIAL (decl) = make_node (BLOCK);
8047 current_function_decl = decl;
8048 init_function_start (decl);
8049 first_function_block_is_cold = false;
8050 /* Make sure unwind info is emitted for the thunk if needed. */
8051 final_start_function (emit_barrier (), asm_out_file, 1);
8053 /* Pad stack IP move with 4 instructions (two NOPs count
8054 as one instruction). */
8055 if (TARGET_PAD_SHORT_FUNCTION)
8060 fputs ("\tnop\n", asm_out_file);
8063 xops[0] = gen_rtx_REG (Pmode, regno);
8064 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8065 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8066 fputs ("\tret\n", asm_out_file);
8067 final_end_function ();
8068 init_insn_lengths ();
8069 free_after_compilation (cfun);
8071 current_function_decl = NULL;
8074 if (flag_split_stack)
8075 file_end_indicate_split_stack ();
8078 /* Emit code for the SET_GOT patterns. */
8081 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8087 if (TARGET_VXWORKS_RTP && flag_pic)
8089 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8090 xops[2] = gen_rtx_MEM (Pmode,
8091 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8092 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8094 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8095 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8096 an unadorned address. */
8097 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8098 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8099 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8103 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8105 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8107 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8110 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8113 output_asm_insn ("call\t%a2", xops);
8114 #ifdef DWARF2_UNWIND_INFO
8115 /* The call to next label acts as a push. */
8116 if (dwarf2out_do_frame ())
8120 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8121 gen_rtx_PLUS (Pmode,
8124 RTX_FRAME_RELATED_P (insn) = 1;
8125 dwarf2out_frame_debug (insn, true);
8132 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8133 is what will be referenced by the Mach-O PIC subsystem. */
8135 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8138 targetm.asm_out.internal_label (asm_out_file, "L",
8139 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8143 output_asm_insn ("pop%z0\t%0", xops);
8144 #ifdef DWARF2_UNWIND_INFO
8145 /* The pop is a pop and clobbers dest, but doesn't restore it
8146 for unwind info purposes. */
8147 if (dwarf2out_do_frame ())
8151 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8152 dwarf2out_frame_debug (insn, true);
8153 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8154 gen_rtx_PLUS (Pmode,
8157 RTX_FRAME_RELATED_P (insn) = 1;
8158 dwarf2out_frame_debug (insn, true);
8167 get_pc_thunk_name (name, REGNO (dest));
8168 pic_labels_used |= 1 << REGNO (dest);
8170 #ifdef DWARF2_UNWIND_INFO
8171 /* Ensure all queued register saves are flushed before the
8173 if (dwarf2out_do_frame ())
8174 dwarf2out_flush_queued_reg_saves ();
8176 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8177 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8178 output_asm_insn ("call\t%X2", xops);
8179 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8180 is what will be referenced by the Mach-O PIC subsystem. */
8183 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8185 targetm.asm_out.internal_label (asm_out_file, "L",
8186 CODE_LABEL_NUMBER (label));
8193 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8194 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8196 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8201 /* Generate an "push" pattern for input ARG. */
8206 struct machine_function *m = cfun->machine;
8208 if (m->fs.cfa_reg == stack_pointer_rtx)
8209 m->fs.cfa_offset += UNITS_PER_WORD;
8210 m->fs.sp_offset += UNITS_PER_WORD;
8212 return gen_rtx_SET (VOIDmode,
8214 gen_rtx_PRE_DEC (Pmode,
8215 stack_pointer_rtx)),
8219 /* Generate an "pop" pattern for input ARG. */
8224 return gen_rtx_SET (VOIDmode,
8227 gen_rtx_POST_INC (Pmode,
8228 stack_pointer_rtx)));
8231 /* Return >= 0 if there is an unused call-clobbered register available
8232 for the entire function. */
8235 ix86_select_alt_pic_regnum (void)
8237 if (current_function_is_leaf
8239 && !ix86_current_function_calls_tls_descriptor)
8242 /* Can't use the same register for both PIC and DRAP. */
8244 drap = REGNO (crtl->drap_reg);
8247 for (i = 2; i >= 0; --i)
8248 if (i != drap && !df_regs_ever_live_p (i))
8252 return INVALID_REGNUM;
8255 /* Return 1 if we need to save REGNO. */
8257 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8259 if (pic_offset_table_rtx
8260 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8261 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8263 || crtl->calls_eh_return
8264 || crtl->uses_const_pool))
8266 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8271 if (crtl->calls_eh_return && maybe_eh_return)
8276 unsigned test = EH_RETURN_DATA_REGNO (i);
8277 if (test == INVALID_REGNUM)
8284 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8287 return (df_regs_ever_live_p (regno)
8288 && !call_used_regs[regno]
8289 && !fixed_regs[regno]
8290 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8293 /* Return number of saved general prupose registers. */
8296 ix86_nsaved_regs (void)
8301 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8302 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8307 /* Return number of saved SSE registrers. */
8310 ix86_nsaved_sseregs (void)
8315 if (ix86_cfun_abi () != MS_ABI)
8317 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8318 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8323 /* Given FROM and TO register numbers, say whether this elimination is
8324 allowed. If stack alignment is needed, we can only replace argument
8325 pointer with hard frame pointer, or replace frame pointer with stack
8326 pointer. Otherwise, frame pointer elimination is automatically
8327 handled and all other eliminations are valid. */
8330 ix86_can_eliminate (const int from, const int to)
8332 if (stack_realign_fp)
8333 return ((from == ARG_POINTER_REGNUM
8334 && to == HARD_FRAME_POINTER_REGNUM)
8335 || (from == FRAME_POINTER_REGNUM
8336 && to == STACK_POINTER_REGNUM));
8338 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8341 /* Return the offset between two registers, one to be eliminated, and the other
8342 its replacement, at the start of a routine. */
8345 ix86_initial_elimination_offset (int from, int to)
8347 struct ix86_frame frame;
8348 ix86_compute_frame_layout (&frame);
8350 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8351 return frame.hard_frame_pointer_offset;
8352 else if (from == FRAME_POINTER_REGNUM
8353 && to == HARD_FRAME_POINTER_REGNUM)
8354 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8357 gcc_assert (to == STACK_POINTER_REGNUM);
8359 if (from == ARG_POINTER_REGNUM)
8360 return frame.stack_pointer_offset;
8362 gcc_assert (from == FRAME_POINTER_REGNUM);
8363 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8367 /* In a dynamically-aligned function, we can't know the offset from
8368 stack pointer to frame pointer, so we must ensure that setjmp
8369 eliminates fp against the hard fp (%ebp) rather than trying to
8370 index from %esp up to the top of the frame across a gap that is
8371 of unknown (at compile-time) size. */
8373 ix86_builtin_setjmp_frame_value (void)
8375 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8378 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8379 field in the TCB, so they can not be used together. */
8382 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8386 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8388 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8391 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8394 error ("%<-fsplit-stack%> requires "
8395 "assembler support for CFI directives");
8403 /* When using -fsplit-stack, the allocation routines set a field in
8404 the TCB to the bottom of the stack plus this much space, measured
8407 #define SPLIT_STACK_AVAILABLE 256
8409 /* Fill structure ix86_frame about frame of currently computed function. */
8412 ix86_compute_frame_layout (struct ix86_frame *frame)
8414 unsigned int stack_alignment_needed;
8415 HOST_WIDE_INT offset;
8416 unsigned int preferred_alignment;
8417 HOST_WIDE_INT size = get_frame_size ();
8418 HOST_WIDE_INT to_allocate;
8420 frame->nregs = ix86_nsaved_regs ();
8421 frame->nsseregs = ix86_nsaved_sseregs ();
8423 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8424 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8426 /* MS ABI seem to require stack alignment to be always 16 except for function
8427 prologues and leaf. */
8428 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8429 && (!current_function_is_leaf || cfun->calls_alloca != 0
8430 || ix86_current_function_calls_tls_descriptor))
8432 preferred_alignment = 16;
8433 stack_alignment_needed = 16;
8434 crtl->preferred_stack_boundary = 128;
8435 crtl->stack_alignment_needed = 128;
8438 gcc_assert (!size || stack_alignment_needed);
8439 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8440 gcc_assert (preferred_alignment <= stack_alignment_needed);
8442 /* During reload iteration the amount of registers saved can change.
8443 Recompute the value as needed. Do not recompute when amount of registers
8444 didn't change as reload does multiple calls to the function and does not
8445 expect the decision to change within single iteration. */
8446 if (!optimize_function_for_size_p (cfun)
8447 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8449 int count = frame->nregs;
8450 struct cgraph_node *node = cgraph_node (current_function_decl);
8452 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8453 /* The fast prologue uses move instead of push to save registers. This
8454 is significantly longer, but also executes faster as modern hardware
8455 can execute the moves in parallel, but can't do that for push/pop.
8457 Be careful about choosing what prologue to emit: When function takes
8458 many instructions to execute we may use slow version as well as in
8459 case function is known to be outside hot spot (this is known with
8460 feedback only). Weight the size of function by number of registers
8461 to save as it is cheap to use one or two push instructions but very
8462 slow to use many of them. */
8464 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8465 if (node->frequency < NODE_FREQUENCY_NORMAL
8466 || (flag_branch_probabilities
8467 && node->frequency < NODE_FREQUENCY_HOT))
8468 cfun->machine->use_fast_prologue_epilogue = false;
8470 cfun->machine->use_fast_prologue_epilogue
8471 = !expensive_function_p (count);
8473 if (TARGET_PROLOGUE_USING_MOVE
8474 && cfun->machine->use_fast_prologue_epilogue)
8475 frame->save_regs_using_mov = true;
8477 frame->save_regs_using_mov = false;
8479 /* If static stack checking is enabled and done with probes, the registers
8480 need to be saved before allocating the frame. */
8481 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8482 frame->save_regs_using_mov = false;
8484 /* Skip return address. */
8485 offset = UNITS_PER_WORD;
8487 /* Skip pushed static chain. */
8488 if (ix86_static_chain_on_stack)
8489 offset += UNITS_PER_WORD;
8491 /* Skip saved base pointer. */
8492 if (frame_pointer_needed)
8493 offset += UNITS_PER_WORD;
8495 frame->hard_frame_pointer_offset = offset;
8497 /* Register save area */
8498 offset += frame->nregs * UNITS_PER_WORD;
8499 frame->reg_save_offset = offset;
8501 /* Align and set SSE register save area. */
8502 if (frame->nsseregs)
8504 /* The only ABI that has saved SSE registers (Win64) also has a
8505 16-byte aligned default stack, and thus we don't need to be
8506 within the re-aligned local stack frame to save them. */
8507 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8508 offset = (offset + 16 - 1) & -16;
8509 offset += frame->nsseregs * 16;
8511 frame->sse_reg_save_offset = offset;
8513 /* The re-aligned stack starts here. Values before this point are not
8514 directly comparable with values below this point. In order to make
8515 sure that no value happens to be the same before and after, force
8516 the alignment computation below to add a non-zero value. */
8517 if (stack_realign_fp)
8518 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8521 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8522 offset += frame->va_arg_size;
8524 /* Align start of frame for local function. */
8525 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8527 /* Frame pointer points here. */
8528 frame->frame_pointer_offset = offset;
8532 /* Add outgoing arguments area. Can be skipped if we eliminated
8533 all the function calls as dead code.
8534 Skipping is however impossible when function calls alloca. Alloca
8535 expander assumes that last crtl->outgoing_args_size
8536 of stack frame are unused. */
8537 if (ACCUMULATE_OUTGOING_ARGS
8538 && (!current_function_is_leaf || cfun->calls_alloca
8539 || ix86_current_function_calls_tls_descriptor))
8541 offset += crtl->outgoing_args_size;
8542 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8545 frame->outgoing_arguments_size = 0;
8547 /* Align stack boundary. Only needed if we're calling another function
8549 if (!current_function_is_leaf || cfun->calls_alloca
8550 || ix86_current_function_calls_tls_descriptor)
8551 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8553 /* We've reached end of stack frame. */
8554 frame->stack_pointer_offset = offset;
8556 /* Size prologue needs to allocate. */
8557 to_allocate = offset - frame->sse_reg_save_offset;
8559 if ((!to_allocate && frame->nregs <= 1)
8560 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8561 frame->save_regs_using_mov = false;
8563 if (ix86_using_red_zone ()
8564 && current_function_sp_is_unchanging
8565 && current_function_is_leaf
8566 && !ix86_current_function_calls_tls_descriptor)
8568 frame->red_zone_size = to_allocate;
8569 if (frame->save_regs_using_mov)
8570 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8571 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8572 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8575 frame->red_zone_size = 0;
8576 frame->stack_pointer_offset -= frame->red_zone_size;
8579 /* This is semi-inlined memory_address_length, but simplified
8580 since we know that we're always dealing with reg+offset, and
8581 to avoid having to create and discard all that rtl. */
8584 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8590 /* EBP and R13 cannot be encoded without an offset. */
8591 len = (regno == BP_REG || regno == R13_REG);
8593 else if (IN_RANGE (offset, -128, 127))
8596 /* ESP and R12 must be encoded with a SIB byte. */
8597 if (regno == SP_REG || regno == R12_REG)
8603 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8604 The valid base registers are taken from CFUN->MACHINE->FS. */
8607 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8609 const struct machine_function *m = cfun->machine;
8610 rtx base_reg = NULL;
8611 HOST_WIDE_INT base_offset = 0;
8613 if (m->use_fast_prologue_epilogue)
8615 /* Choose the base register most likely to allow the most scheduling
8616 opportunities. Generally FP is valid througout the function,
8617 while DRAP must be reloaded within the epilogue. But choose either
8618 over the SP due to increased encoding size. */
8622 base_reg = hard_frame_pointer_rtx;
8623 base_offset = m->fs.fp_offset - cfa_offset;
8625 else if (m->fs.drap_valid)
8627 base_reg = crtl->drap_reg;
8628 base_offset = 0 - cfa_offset;
8630 else if (m->fs.sp_valid)
8632 base_reg = stack_pointer_rtx;
8633 base_offset = m->fs.sp_offset - cfa_offset;
8638 HOST_WIDE_INT toffset;
8641 /* Choose the base register with the smallest address encoding.
8642 With a tie, choose FP > DRAP > SP. */
8645 base_reg = stack_pointer_rtx;
8646 base_offset = m->fs.sp_offset - cfa_offset;
8647 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8649 if (m->fs.drap_valid)
8651 toffset = 0 - cfa_offset;
8652 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8655 base_reg = crtl->drap_reg;
8656 base_offset = toffset;
8662 toffset = m->fs.fp_offset - cfa_offset;
8663 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8666 base_reg = hard_frame_pointer_rtx;
8667 base_offset = toffset;
8672 gcc_assert (base_reg != NULL);
8674 return plus_constant (base_reg, base_offset);
8677 /* Emit code to save registers in the prologue. */
8680 ix86_emit_save_regs (void)
8685 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8686 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8688 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8689 RTX_FRAME_RELATED_P (insn) = 1;
8693 /* Emit a single register save at CFA - CFA_OFFSET. */
8696 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8697 HOST_WIDE_INT cfa_offset)
8699 struct machine_function *m = cfun->machine;
8700 rtx reg = gen_rtx_REG (mode, regno);
8701 rtx mem, addr, base, insn;
8703 addr = choose_baseaddr (cfa_offset);
8704 mem = gen_frame_mem (mode, addr);
8706 /* For SSE saves, we need to indicate the 128-bit alignment. */
8707 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8709 insn = emit_move_insn (mem, reg);
8710 RTX_FRAME_RELATED_P (insn) = 1;
8713 if (GET_CODE (base) == PLUS)
8714 base = XEXP (base, 0);
8715 gcc_checking_assert (REG_P (base));
8717 /* When saving registers into a re-aligned local stack frame, avoid
8718 any tricky guessing by dwarf2out. */
8719 if (m->fs.realigned)
8721 gcc_checking_assert (stack_realign_drap);
8723 if (regno == REGNO (crtl->drap_reg))
8725 /* A bit of a hack. We force the DRAP register to be saved in
8726 the re-aligned stack frame, which provides us with a copy
8727 of the CFA that will last past the prologue. Install it. */
8728 gcc_checking_assert (cfun->machine->fs.fp_valid);
8729 addr = plus_constant (hard_frame_pointer_rtx,
8730 cfun->machine->fs.fp_offset - cfa_offset);
8731 mem = gen_rtx_MEM (mode, addr);
8732 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8736 /* The frame pointer is a stable reference within the
8737 aligned frame. Use it. */
8738 gcc_checking_assert (cfun->machine->fs.fp_valid);
8739 addr = plus_constant (hard_frame_pointer_rtx,
8740 cfun->machine->fs.fp_offset - cfa_offset);
8741 mem = gen_rtx_MEM (mode, addr);
8742 add_reg_note (insn, REG_CFA_EXPRESSION,
8743 gen_rtx_SET (VOIDmode, mem, reg));
8747 /* The memory may not be relative to the current CFA register,
8748 which means that we may need to generate a new pattern for
8749 use by the unwind info. */
8750 else if (base != m->fs.cfa_reg)
8752 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8753 mem = gen_rtx_MEM (mode, addr);
8754 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8758 /* Emit code to save registers using MOV insns.
8759 First register is stored at CFA - CFA_OFFSET. */
8761 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8765 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8766 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8768 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8769 cfa_offset -= UNITS_PER_WORD;
8773 /* Emit code to save SSE registers using MOV insns.
8774 First register is stored at CFA - CFA_OFFSET. */
8776 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8780 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8781 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8783 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8788 static GTY(()) rtx queued_cfa_restores;
8790 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8791 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8792 Don't add the note if the previously saved value will be left untouched
8793 within stack red-zone till return, as unwinders can find the same value
8794 in the register and on the stack. */
8797 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8799 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8804 add_reg_note (insn, REG_CFA_RESTORE, reg);
8805 RTX_FRAME_RELATED_P (insn) = 1;
8809 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8812 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8815 ix86_add_queued_cfa_restore_notes (rtx insn)
8818 if (!queued_cfa_restores)
8820 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8822 XEXP (last, 1) = REG_NOTES (insn);
8823 REG_NOTES (insn) = queued_cfa_restores;
8824 queued_cfa_restores = NULL_RTX;
8825 RTX_FRAME_RELATED_P (insn) = 1;
8828 /* Expand prologue or epilogue stack adjustment.
8829 The pattern exist to put a dependency on all ebp-based memory accesses.
8830 STYLE should be negative if instructions should be marked as frame related,
8831 zero if %r11 register is live and cannot be freely used and positive
8835 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8836 int style, bool set_cfa)
8838 struct machine_function *m = cfun->machine;
8842 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
8843 else if (x86_64_immediate_operand (offset, DImode))
8844 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
8848 /* r11 is used by indirect sibcall return as well, set before the
8849 epilogue and used after the epilogue. */
8851 tmp = gen_rtx_REG (DImode, R11_REG);
8854 gcc_assert (src != hard_frame_pointer_rtx
8855 && dest != hard_frame_pointer_rtx);
8856 tmp = hard_frame_pointer_rtx;
8858 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8860 RTX_FRAME_RELATED_P (insn) = 1;
8862 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
8865 insn = emit_insn (insn);
8867 ix86_add_queued_cfa_restore_notes (insn);
8873 gcc_assert (m->fs.cfa_reg == src);
8874 m->fs.cfa_offset += INTVAL (offset);
8875 m->fs.cfa_reg = dest;
8877 r = gen_rtx_PLUS (Pmode, src, offset);
8878 r = gen_rtx_SET (VOIDmode, dest, r);
8879 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8880 RTX_FRAME_RELATED_P (insn) = 1;
8883 RTX_FRAME_RELATED_P (insn) = 1;
8885 if (dest == stack_pointer_rtx)
8887 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8888 bool valid = m->fs.sp_valid;
8890 if (src == hard_frame_pointer_rtx)
8892 valid = m->fs.fp_valid;
8893 ooffset = m->fs.fp_offset;
8895 else if (src == crtl->drap_reg)
8897 valid = m->fs.drap_valid;
8902 /* Else there are two possibilities: SP itself, which we set
8903 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8904 taken care of this by hand along the eh_return path. */
8905 gcc_checking_assert (src == stack_pointer_rtx
8906 || offset == const0_rtx);
8909 m->fs.sp_offset = ooffset - INTVAL (offset);
8910 m->fs.sp_valid = valid;
8914 /* Find an available register to be used as dynamic realign argument
8915 pointer regsiter. Such a register will be written in prologue and
8916 used in begin of body, so it must not be
8917 1. parameter passing register.
8919 We reuse static-chain register if it is available. Otherwise, we
8920 use DI for i386 and R13 for x86-64. We chose R13 since it has
8923 Return: the regno of chosen register. */
8926 find_drap_reg (void)
8928 tree decl = cfun->decl;
8932 /* Use R13 for nested function or function need static chain.
8933 Since function with tail call may use any caller-saved
8934 registers in epilogue, DRAP must not use caller-saved
8935 register in such case. */
8936 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8943 /* Use DI for nested function or function need static chain.
8944 Since function with tail call may use any caller-saved
8945 registers in epilogue, DRAP must not use caller-saved
8946 register in such case. */
8947 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8950 /* Reuse static chain register if it isn't used for parameter
8952 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8953 && !lookup_attribute ("fastcall",
8954 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8955 && !lookup_attribute ("thiscall",
8956 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8963 /* Return minimum incoming stack alignment. */
8966 ix86_minimum_incoming_stack_boundary (bool sibcall)
8968 unsigned int incoming_stack_boundary;
8970 /* Prefer the one specified at command line. */
8971 if (ix86_user_incoming_stack_boundary)
8972 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8973 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8974 if -mstackrealign is used, it isn't used for sibcall check and
8975 estimated stack alignment is 128bit. */
8978 && ix86_force_align_arg_pointer
8979 && crtl->stack_alignment_estimated == 128)
8980 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8982 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8984 /* Incoming stack alignment can be changed on individual functions
8985 via force_align_arg_pointer attribute. We use the smallest
8986 incoming stack boundary. */
8987 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8988 && lookup_attribute (ix86_force_align_arg_pointer_string,
8989 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8990 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8992 /* The incoming stack frame has to be aligned at least at
8993 parm_stack_boundary. */
8994 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8995 incoming_stack_boundary = crtl->parm_stack_boundary;
8997 /* Stack at entrance of main is aligned by runtime. We use the
8998 smallest incoming stack boundary. */
8999 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9000 && DECL_NAME (current_function_decl)
9001 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9002 && DECL_FILE_SCOPE_P (current_function_decl))
9003 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9005 return incoming_stack_boundary;
9008 /* Update incoming stack boundary and estimated stack alignment. */
9011 ix86_update_stack_boundary (void)
9013 ix86_incoming_stack_boundary
9014 = ix86_minimum_incoming_stack_boundary (false);
9016 /* x86_64 vararg needs 16byte stack alignment for register save
9020 && crtl->stack_alignment_estimated < 128)
9021 crtl->stack_alignment_estimated = 128;
9024 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9025 needed or an rtx for DRAP otherwise. */
9028 ix86_get_drap_rtx (void)
9030 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9031 crtl->need_drap = true;
9033 if (stack_realign_drap)
9035 /* Assign DRAP to vDRAP and returns vDRAP */
9036 unsigned int regno = find_drap_reg ();
9041 arg_ptr = gen_rtx_REG (Pmode, regno);
9042 crtl->drap_reg = arg_ptr;
9045 drap_vreg = copy_to_reg (arg_ptr);
9049 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9052 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9053 RTX_FRAME_RELATED_P (insn) = 1;
9061 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9064 ix86_internal_arg_pointer (void)
9066 return virtual_incoming_args_rtx;
9069 struct scratch_reg {
9074 /* Return a short-lived scratch register for use on function entry.
9075 In 32-bit mode, it is valid only after the registers are saved
9076 in the prologue. This register must be released by means of
9077 release_scratch_register_on_entry once it is dead. */
9080 get_scratch_register_on_entry (struct scratch_reg *sr)
9088 /* We always use R11 in 64-bit mode. */
9093 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9095 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9096 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9097 int regparm = ix86_function_regparm (fntype, decl);
9099 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9101 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9102 for the static chain register. */
9103 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9104 && drap_regno != AX_REG)
9106 else if (regparm < 2 && drap_regno != DX_REG)
9108 /* ecx is the static chain register. */
9109 else if (regparm < 3 && !fastcall_p && !static_chain_p
9110 && drap_regno != CX_REG)
9112 else if (ix86_save_reg (BX_REG, true))
9114 /* esi is the static chain register. */
9115 else if (!(regparm == 3 && static_chain_p)
9116 && ix86_save_reg (SI_REG, true))
9118 else if (ix86_save_reg (DI_REG, true))
9122 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9127 sr->reg = gen_rtx_REG (Pmode, regno);
9130 rtx insn = emit_insn (gen_push (sr->reg));
9131 RTX_FRAME_RELATED_P (insn) = 1;
9135 /* Release a scratch register obtained from the preceding function. */
9138 release_scratch_register_on_entry (struct scratch_reg *sr)
9142 rtx x, insn = emit_insn (gen_pop (sr->reg));
9144 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9145 RTX_FRAME_RELATED_P (insn) = 1;
9146 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9147 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9148 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9152 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9154 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9157 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9159 /* We skip the probe for the first interval + a small dope of 4 words and
9160 probe that many bytes past the specified size to maintain a protection
9161 area at the botton of the stack. */
9162 const int dope = 4 * UNITS_PER_WORD;
9163 rtx size_rtx = GEN_INT (size);
9165 /* See if we have a constant small number of probes to generate. If so,
9166 that's the easy case. The run-time loop is made up of 11 insns in the
9167 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9168 for n # of intervals. */
9169 if (size <= 5 * PROBE_INTERVAL)
9171 HOST_WIDE_INT i, adjust;
9172 bool first_probe = true;
9174 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9175 values of N from 1 until it exceeds SIZE. If only one probe is
9176 needed, this will not generate any code. Then adjust and probe
9177 to PROBE_INTERVAL + SIZE. */
9178 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9182 adjust = 2 * PROBE_INTERVAL + dope;
9183 first_probe = false;
9186 adjust = PROBE_INTERVAL;
9188 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9189 plus_constant (stack_pointer_rtx, -adjust)));
9190 emit_stack_probe (stack_pointer_rtx);
9194 adjust = size + PROBE_INTERVAL + dope;
9196 adjust = size + PROBE_INTERVAL - i;
9198 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9199 plus_constant (stack_pointer_rtx, -adjust)));
9200 emit_stack_probe (stack_pointer_rtx);
9202 /* Adjust back to account for the additional first interval. */
9203 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9204 plus_constant (stack_pointer_rtx,
9205 PROBE_INTERVAL + dope)));
9208 /* Otherwise, do the same as above, but in a loop. Note that we must be
9209 extra careful with variables wrapping around because we might be at
9210 the very top (or the very bottom) of the address space and we have
9211 to be able to handle this case properly; in particular, we use an
9212 equality test for the loop condition. */
9215 HOST_WIDE_INT rounded_size;
9216 struct scratch_reg sr;
9218 get_scratch_register_on_entry (&sr);
9221 /* Step 1: round SIZE to the previous multiple of the interval. */
9223 rounded_size = size & -PROBE_INTERVAL;
9226 /* Step 2: compute initial and final value of the loop counter. */
9228 /* SP = SP_0 + PROBE_INTERVAL. */
9229 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9230 plus_constant (stack_pointer_rtx,
9231 - (PROBE_INTERVAL + dope))));
9233 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9234 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9235 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9236 gen_rtx_PLUS (Pmode, sr.reg,
9237 stack_pointer_rtx)));
9242 while (SP != LAST_ADDR)
9244 SP = SP + PROBE_INTERVAL
9248 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9249 values of N from 1 until it is equal to ROUNDED_SIZE. */
9251 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9254 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9255 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9257 if (size != rounded_size)
9259 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9260 plus_constant (stack_pointer_rtx,
9261 rounded_size - size)));
9262 emit_stack_probe (stack_pointer_rtx);
9265 /* Adjust back to account for the additional first interval. */
9266 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9267 plus_constant (stack_pointer_rtx,
9268 PROBE_INTERVAL + dope)));
9270 release_scratch_register_on_entry (&sr);
9273 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9274 cfun->machine->fs.sp_offset += size;
9276 /* Make sure nothing is scheduled before we are done. */
9277 emit_insn (gen_blockage ());
9280 /* Adjust the stack pointer up to REG while probing it. */
9283 output_adjust_stack_and_probe (rtx reg)
9285 static int labelno = 0;
9286 char loop_lab[32], end_lab[32];
9289 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9290 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9292 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9294 /* Jump to END_LAB if SP == LAST_ADDR. */
9295 xops[0] = stack_pointer_rtx;
9297 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9298 fputs ("\tje\t", asm_out_file);
9299 assemble_name_raw (asm_out_file, end_lab);
9300 fputc ('\n', asm_out_file);
9302 /* SP = SP + PROBE_INTERVAL. */
9303 xops[1] = GEN_INT (PROBE_INTERVAL);
9304 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9307 xops[1] = const0_rtx;
9308 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9310 fprintf (asm_out_file, "\tjmp\t");
9311 assemble_name_raw (asm_out_file, loop_lab);
9312 fputc ('\n', asm_out_file);
9314 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9319 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9320 inclusive. These are offsets from the current stack pointer. */
9323 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9325 /* See if we have a constant small number of probes to generate. If so,
9326 that's the easy case. The run-time loop is made up of 7 insns in the
9327 generic case while the compile-time loop is made up of n insns for n #
9329 if (size <= 7 * PROBE_INTERVAL)
9333 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9334 it exceeds SIZE. If only one probe is needed, this will not
9335 generate any code. Then probe at FIRST + SIZE. */
9336 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9337 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9339 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9342 /* Otherwise, do the same as above, but in a loop. Note that we must be
9343 extra careful with variables wrapping around because we might be at
9344 the very top (or the very bottom) of the address space and we have
9345 to be able to handle this case properly; in particular, we use an
9346 equality test for the loop condition. */
9349 HOST_WIDE_INT rounded_size, last;
9350 struct scratch_reg sr;
9352 get_scratch_register_on_entry (&sr);
9355 /* Step 1: round SIZE to the previous multiple of the interval. */
9357 rounded_size = size & -PROBE_INTERVAL;
9360 /* Step 2: compute initial and final value of the loop counter. */
9362 /* TEST_OFFSET = FIRST. */
9363 emit_move_insn (sr.reg, GEN_INT (-first));
9365 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9366 last = first + rounded_size;
9371 while (TEST_ADDR != LAST_ADDR)
9373 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9377 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9378 until it is equal to ROUNDED_SIZE. */
9380 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9383 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9384 that SIZE is equal to ROUNDED_SIZE. */
9386 if (size != rounded_size)
9387 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9390 rounded_size - size));
9392 release_scratch_register_on_entry (&sr);
9395 /* Make sure nothing is scheduled before we are done. */
9396 emit_insn (gen_blockage ());
9399 /* Probe a range of stack addresses from REG to END, inclusive. These are
9400 offsets from the current stack pointer. */
9403 output_probe_stack_range (rtx reg, rtx end)
9405 static int labelno = 0;
9406 char loop_lab[32], end_lab[32];
9409 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9410 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9412 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9414 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9417 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9418 fputs ("\tje\t", asm_out_file);
9419 assemble_name_raw (asm_out_file, end_lab);
9420 fputc ('\n', asm_out_file);
9422 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9423 xops[1] = GEN_INT (PROBE_INTERVAL);
9424 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9426 /* Probe at TEST_ADDR. */
9427 xops[0] = stack_pointer_rtx;
9429 xops[2] = const0_rtx;
9430 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9432 fprintf (asm_out_file, "\tjmp\t");
9433 assemble_name_raw (asm_out_file, loop_lab);
9434 fputc ('\n', asm_out_file);
9436 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9441 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9442 to be generated in correct form. */
9444 ix86_finalize_stack_realign_flags (void)
9446 /* Check if stack realign is really needed after reload, and
9447 stores result in cfun */
9448 unsigned int incoming_stack_boundary
9449 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9450 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9451 unsigned int stack_realign = (incoming_stack_boundary
9452 < (current_function_is_leaf
9453 ? crtl->max_used_stack_slot_alignment
9454 : crtl->stack_alignment_needed));
9456 if (crtl->stack_realign_finalized)
9458 /* After stack_realign_needed is finalized, we can't no longer
9460 gcc_assert (crtl->stack_realign_needed == stack_realign);
9464 crtl->stack_realign_needed = stack_realign;
9465 crtl->stack_realign_finalized = true;
9469 /* Expand the prologue into a bunch of separate insns. */
9472 ix86_expand_prologue (void)
9474 struct machine_function *m = cfun->machine;
9477 struct ix86_frame frame;
9478 HOST_WIDE_INT allocate;
9479 bool int_registers_saved;
9481 ix86_finalize_stack_realign_flags ();
9483 /* DRAP should not coexist with stack_realign_fp */
9484 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9486 memset (&m->fs, 0, sizeof (m->fs));
9488 /* Initialize CFA state for before the prologue. */
9489 m->fs.cfa_reg = stack_pointer_rtx;
9490 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9492 /* Track SP offset to the CFA. We continue tracking this after we've
9493 swapped the CFA register away from SP. In the case of re-alignment
9494 this is fudged; we're interested to offsets within the local frame. */
9495 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9496 m->fs.sp_valid = true;
9498 ix86_compute_frame_layout (&frame);
9500 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9502 /* We should have already generated an error for any use of
9503 ms_hook on a nested function. */
9504 gcc_checking_assert (!ix86_static_chain_on_stack);
9506 /* Check if profiling is active and we shall use profiling before
9507 prologue variant. If so sorry. */
9508 if (crtl->profile && flag_fentry != 0)
9509 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9511 /* In ix86_asm_output_function_label we emitted:
9512 8b ff movl.s %edi,%edi
9514 8b ec movl.s %esp,%ebp
9516 This matches the hookable function prologue in Win32 API
9517 functions in Microsoft Windows XP Service Pack 2 and newer.
9518 Wine uses this to enable Windows apps to hook the Win32 API
9519 functions provided by Wine.
9521 What that means is that we've already set up the frame pointer. */
9523 if (frame_pointer_needed
9524 && !(crtl->drap_reg && crtl->stack_realign_needed))
9528 /* We've decided to use the frame pointer already set up.
9529 Describe this to the unwinder by pretending that both
9530 push and mov insns happen right here.
9532 Putting the unwind info here at the end of the ms_hook
9533 is done so that we can make absolutely certain we get
9534 the required byte sequence at the start of the function,
9535 rather than relying on an assembler that can produce
9536 the exact encoding required.
9538 However it does mean (in the unpatched case) that we have
9539 a 1 insn window where the asynchronous unwind info is
9540 incorrect. However, if we placed the unwind info at
9541 its correct location we would have incorrect unwind info
9542 in the patched case. Which is probably all moot since
9543 I don't expect Wine generates dwarf2 unwind info for the
9544 system libraries that use this feature. */
9546 insn = emit_insn (gen_blockage ());
9548 push = gen_push (hard_frame_pointer_rtx);
9549 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9551 RTX_FRAME_RELATED_P (push) = 1;
9552 RTX_FRAME_RELATED_P (mov) = 1;
9554 RTX_FRAME_RELATED_P (insn) = 1;
9555 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9556 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9558 /* Note that gen_push incremented m->fs.cfa_offset, even
9559 though we didn't emit the push insn here. */
9560 m->fs.cfa_reg = hard_frame_pointer_rtx;
9561 m->fs.fp_offset = m->fs.cfa_offset;
9562 m->fs.fp_valid = true;
9566 /* The frame pointer is not needed so pop %ebp again.
9567 This leaves us with a pristine state. */
9568 emit_insn (gen_pop (hard_frame_pointer_rtx));
9572 /* The first insn of a function that accepts its static chain on the
9573 stack is to push the register that would be filled in by a direct
9574 call. This insn will be skipped by the trampoline. */
9575 else if (ix86_static_chain_on_stack)
9577 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9578 emit_insn (gen_blockage ());
9580 /* We don't want to interpret this push insn as a register save,
9581 only as a stack adjustment. The real copy of the register as
9582 a save will be done later, if needed. */
9583 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9584 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9585 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9586 RTX_FRAME_RELATED_P (insn) = 1;
9589 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9590 of DRAP is needed and stack realignment is really needed after reload */
9591 if (stack_realign_drap)
9593 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9595 /* Only need to push parameter pointer reg if it is caller saved. */
9596 if (!call_used_regs[REGNO (crtl->drap_reg)])
9598 /* Push arg pointer reg */
9599 insn = emit_insn (gen_push (crtl->drap_reg));
9600 RTX_FRAME_RELATED_P (insn) = 1;
9603 /* Grab the argument pointer. */
9604 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9605 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9606 RTX_FRAME_RELATED_P (insn) = 1;
9607 m->fs.cfa_reg = crtl->drap_reg;
9608 m->fs.cfa_offset = 0;
9610 /* Align the stack. */
9611 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9613 GEN_INT (-align_bytes)));
9614 RTX_FRAME_RELATED_P (insn) = 1;
9616 /* Replicate the return address on the stack so that return
9617 address can be reached via (argp - 1) slot. This is needed
9618 to implement macro RETURN_ADDR_RTX and intrinsic function
9619 expand_builtin_return_addr etc. */
9620 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9621 t = gen_frame_mem (Pmode, t);
9622 insn = emit_insn (gen_push (t));
9623 RTX_FRAME_RELATED_P (insn) = 1;
9625 /* For the purposes of frame and register save area addressing,
9626 we've started over with a new frame. */
9627 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9628 m->fs.realigned = true;
9631 if (frame_pointer_needed && !m->fs.fp_valid)
9633 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9634 slower on all targets. Also sdb doesn't like it. */
9635 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9636 RTX_FRAME_RELATED_P (insn) = 1;
9638 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9639 RTX_FRAME_RELATED_P (insn) = 1;
9641 if (m->fs.cfa_reg == stack_pointer_rtx)
9642 m->fs.cfa_reg = hard_frame_pointer_rtx;
9643 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9644 m->fs.fp_offset = m->fs.sp_offset;
9645 m->fs.fp_valid = true;
9648 int_registers_saved = (frame.nregs == 0);
9650 if (!int_registers_saved)
9652 /* If saving registers via PUSH, do so now. */
9653 if (!frame.save_regs_using_mov)
9655 ix86_emit_save_regs ();
9656 int_registers_saved = true;
9657 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9660 /* When using red zone we may start register saving before allocating
9661 the stack frame saving one cycle of the prologue. However, avoid
9662 doing this if we have to probe the stack; at least on x86_64 the
9663 stack probe can turn into a call that clobbers a red zone location. */
9664 else if (ix86_using_red_zone ()
9665 && (! TARGET_STACK_PROBE
9666 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9668 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9669 int_registers_saved = true;
9673 if (stack_realign_fp)
9675 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9676 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9678 /* The computation of the size of the re-aligned stack frame means
9679 that we must allocate the size of the register save area before
9680 performing the actual alignment. Otherwise we cannot guarantee
9681 that there's enough storage above the realignment point. */
9682 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9683 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9684 GEN_INT (m->fs.sp_offset
9685 - frame.sse_reg_save_offset),
9688 /* Align the stack. */
9689 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9691 GEN_INT (-align_bytes)));
9693 /* For the purposes of register save area addressing, the stack
9694 pointer is no longer valid. As for the value of sp_offset,
9695 see ix86_compute_frame_layout, which we need to match in order
9696 to pass verification of stack_pointer_offset at the end. */
9697 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9698 m->fs.sp_valid = false;
9701 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9703 if (flag_stack_usage)
9705 /* We start to count from ARG_POINTER. */
9706 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9708 /* If it was realigned, take into account the fake frame. */
9709 if (stack_realign_drap)
9711 if (ix86_static_chain_on_stack)
9712 stack_size += UNITS_PER_WORD;
9714 if (!call_used_regs[REGNO (crtl->drap_reg)])
9715 stack_size += UNITS_PER_WORD;
9717 /* This over-estimates by 1 minimal-stack-alignment-unit but
9718 mitigates that by counting in the new return address slot. */
9719 current_function_dynamic_stack_size
9720 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9723 current_function_static_stack_size = stack_size;
9726 /* The stack has already been decremented by the instruction calling us
9727 so we need to probe unconditionally to preserve the protection area. */
9728 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9730 /* We expect the registers to be saved when probes are used. */
9731 gcc_assert (int_registers_saved);
9733 if (STACK_CHECK_MOVING_SP)
9735 ix86_adjust_stack_and_probe (allocate);
9740 HOST_WIDE_INT size = allocate;
9742 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9743 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9745 if (TARGET_STACK_PROBE)
9746 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9748 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9754 else if (!ix86_target_stack_probe ()
9755 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9757 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9758 GEN_INT (-allocate), -1,
9759 m->fs.cfa_reg == stack_pointer_rtx);
9763 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9765 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
9767 bool eax_live = false;
9768 bool r10_live = false;
9771 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9772 if (!TARGET_64BIT_MS_ABI)
9773 eax_live = ix86_eax_live_at_start_p ();
9777 emit_insn (gen_push (eax));
9778 allocate -= UNITS_PER_WORD;
9782 r10 = gen_rtx_REG (Pmode, R10_REG);
9783 emit_insn (gen_push (r10));
9784 allocate -= UNITS_PER_WORD;
9787 emit_move_insn (eax, GEN_INT (allocate));
9788 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9790 /* Use the fact that AX still contains ALLOCATE. */
9791 adjust_stack_insn = (TARGET_64BIT
9792 ? gen_pro_epilogue_adjust_stack_di_sub
9793 : gen_pro_epilogue_adjust_stack_si_sub);
9795 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
9796 stack_pointer_rtx, eax));
9798 if (m->fs.cfa_reg == stack_pointer_rtx)
9800 m->fs.cfa_offset += allocate;
9802 RTX_FRAME_RELATED_P (insn) = 1;
9803 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9804 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9805 plus_constant (stack_pointer_rtx,
9808 m->fs.sp_offset += allocate;
9810 if (r10_live && eax_live)
9812 t = choose_baseaddr (m->fs.sp_offset - allocate);
9813 emit_move_insn (r10, gen_frame_mem (Pmode, t));
9814 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
9815 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9817 else if (eax_live || r10_live)
9819 t = choose_baseaddr (m->fs.sp_offset - allocate);
9820 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
9823 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9825 if (!int_registers_saved)
9826 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9828 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9830 pic_reg_used = false;
9831 if (pic_offset_table_rtx
9832 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9835 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9837 if (alt_pic_reg_used != INVALID_REGNUM)
9838 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9840 pic_reg_used = true;
9847 if (ix86_cmodel == CM_LARGE_PIC)
9849 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9850 rtx label = gen_label_rtx ();
9852 LABEL_PRESERVE_P (label) = 1;
9853 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9854 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9855 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9856 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9857 pic_offset_table_rtx, tmp_reg));
9860 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9863 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9866 /* In the pic_reg_used case, make sure that the got load isn't deleted
9867 when mcount needs it. Blockage to avoid call movement across mcount
9868 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9870 if (crtl->profile && !flag_fentry && pic_reg_used)
9871 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9873 if (crtl->drap_reg && !crtl->stack_realign_needed)
9875 /* vDRAP is setup but after reload it turns out stack realign
9876 isn't necessary, here we will emit prologue to setup DRAP
9877 without stack realign adjustment */
9878 t = choose_baseaddr (0);
9879 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9882 /* Prevent instructions from being scheduled into register save push
9883 sequence when access to the redzone area is done through frame pointer.
9884 The offset between the frame pointer and the stack pointer is calculated
9885 relative to the value of the stack pointer at the end of the function
9886 prologue, and moving instructions that access redzone area via frame
9887 pointer inside push sequence violates this assumption. */
9888 if (frame_pointer_needed && frame.red_zone_size)
9889 emit_insn (gen_memory_blockage ());
9891 /* Emit cld instruction if stringops are used in the function. */
9892 if (TARGET_CLD && ix86_current_function_needs_cld)
9893 emit_insn (gen_cld ());
9896 /* Emit code to restore REG using a POP insn. */
9899 ix86_emit_restore_reg_using_pop (rtx reg)
9901 struct machine_function *m = cfun->machine;
9902 rtx insn = emit_insn (gen_pop (reg));
9904 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9905 m->fs.sp_offset -= UNITS_PER_WORD;
9907 if (m->fs.cfa_reg == crtl->drap_reg
9908 && REGNO (reg) == REGNO (crtl->drap_reg))
9910 /* Previously we'd represented the CFA as an expression
9911 like *(%ebp - 8). We've just popped that value from
9912 the stack, which means we need to reset the CFA to
9913 the drap register. This will remain until we restore
9914 the stack pointer. */
9915 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9916 RTX_FRAME_RELATED_P (insn) = 1;
9918 /* This means that the DRAP register is valid for addressing too. */
9919 m->fs.drap_valid = true;
9923 if (m->fs.cfa_reg == stack_pointer_rtx)
9925 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9926 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9927 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9928 RTX_FRAME_RELATED_P (insn) = 1;
9930 m->fs.cfa_offset -= UNITS_PER_WORD;
9933 /* When the frame pointer is the CFA, and we pop it, we are
9934 swapping back to the stack pointer as the CFA. This happens
9935 for stack frames that don't allocate other data, so we assume
9936 the stack pointer is now pointing at the return address, i.e.
9937 the function entry state, which makes the offset be 1 word. */
9938 if (reg == hard_frame_pointer_rtx)
9940 m->fs.fp_valid = false;
9941 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9943 m->fs.cfa_reg = stack_pointer_rtx;
9944 m->fs.cfa_offset -= UNITS_PER_WORD;
9946 add_reg_note (insn, REG_CFA_DEF_CFA,
9947 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9948 GEN_INT (m->fs.cfa_offset)));
9949 RTX_FRAME_RELATED_P (insn) = 1;
9954 /* Emit code to restore saved registers using POP insns. */
9957 ix86_emit_restore_regs_using_pop (void)
9961 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9962 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9963 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9966 /* Emit code and notes for the LEAVE instruction. */
9969 ix86_emit_leave (void)
9971 struct machine_function *m = cfun->machine;
9972 rtx insn = emit_insn (ix86_gen_leave ());
9974 ix86_add_queued_cfa_restore_notes (insn);
9976 gcc_assert (m->fs.fp_valid);
9977 m->fs.sp_valid = true;
9978 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9979 m->fs.fp_valid = false;
9981 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9983 m->fs.cfa_reg = stack_pointer_rtx;
9984 m->fs.cfa_offset = m->fs.sp_offset;
9986 add_reg_note (insn, REG_CFA_DEF_CFA,
9987 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9988 RTX_FRAME_RELATED_P (insn) = 1;
9989 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9994 /* Emit code to restore saved registers using MOV insns.
9995 First register is restored from CFA - CFA_OFFSET. */
9997 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9998 int maybe_eh_return)
10000 struct machine_function *m = cfun->machine;
10001 unsigned int regno;
10003 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10004 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10006 rtx reg = gen_rtx_REG (Pmode, regno);
10009 mem = choose_baseaddr (cfa_offset);
10010 mem = gen_frame_mem (Pmode, mem);
10011 insn = emit_move_insn (reg, mem);
10013 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10015 /* Previously we'd represented the CFA as an expression
10016 like *(%ebp - 8). We've just popped that value from
10017 the stack, which means we need to reset the CFA to
10018 the drap register. This will remain until we restore
10019 the stack pointer. */
10020 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10021 RTX_FRAME_RELATED_P (insn) = 1;
10023 /* This means that the DRAP register is valid for addressing. */
10024 m->fs.drap_valid = true;
10027 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10029 cfa_offset -= UNITS_PER_WORD;
10033 /* Emit code to restore saved registers using MOV insns.
10034 First register is restored from CFA - CFA_OFFSET. */
10036 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10037 int maybe_eh_return)
10039 unsigned int regno;
10041 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10042 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10044 rtx reg = gen_rtx_REG (V4SFmode, regno);
10047 mem = choose_baseaddr (cfa_offset);
10048 mem = gen_rtx_MEM (V4SFmode, mem);
10049 set_mem_align (mem, 128);
10050 emit_move_insn (reg, mem);
10052 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10058 /* Restore function stack, frame, and registers. */
10061 ix86_expand_epilogue (int style)
10063 struct machine_function *m = cfun->machine;
10064 struct machine_frame_state frame_state_save = m->fs;
10065 struct ix86_frame frame;
10066 bool restore_regs_via_mov;
10069 ix86_finalize_stack_realign_flags ();
10070 ix86_compute_frame_layout (&frame);
10072 m->fs.sp_valid = (!frame_pointer_needed
10073 || (current_function_sp_is_unchanging
10074 && !stack_realign_fp));
10075 gcc_assert (!m->fs.sp_valid
10076 || m->fs.sp_offset == frame.stack_pointer_offset);
10078 /* The FP must be valid if the frame pointer is present. */
10079 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10080 gcc_assert (!m->fs.fp_valid
10081 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10083 /* We must have *some* valid pointer to the stack frame. */
10084 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10086 /* The DRAP is never valid at this point. */
10087 gcc_assert (!m->fs.drap_valid);
10089 /* See the comment about red zone and frame
10090 pointer usage in ix86_expand_prologue. */
10091 if (frame_pointer_needed && frame.red_zone_size)
10092 emit_insn (gen_memory_blockage ());
10094 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10095 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10097 /* Determine the CFA offset of the end of the red-zone. */
10098 m->fs.red_zone_offset = 0;
10099 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10101 /* The red-zone begins below the return address. */
10102 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10104 /* When the register save area is in the aligned portion of
10105 the stack, determine the maximum runtime displacement that
10106 matches up with the aligned frame. */
10107 if (stack_realign_drap)
10108 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10112 /* Special care must be taken for the normal return case of a function
10113 using eh_return: the eax and edx registers are marked as saved, but
10114 not restored along this path. Adjust the save location to match. */
10115 if (crtl->calls_eh_return && style != 2)
10116 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10118 /* If we're only restoring one register and sp is not valid then
10119 using a move instruction to restore the register since it's
10120 less work than reloading sp and popping the register. */
10121 if (!m->fs.sp_valid && frame.nregs <= 1)
10122 restore_regs_via_mov = true;
10123 /* EH_RETURN requires the use of moves to function properly. */
10124 else if (crtl->calls_eh_return)
10125 restore_regs_via_mov = true;
10126 else if (TARGET_EPILOGUE_USING_MOVE
10127 && cfun->machine->use_fast_prologue_epilogue
10128 && (frame.nregs > 1
10129 || m->fs.sp_offset != frame.reg_save_offset))
10130 restore_regs_via_mov = true;
10131 else if (frame_pointer_needed
10133 && m->fs.sp_offset != frame.reg_save_offset)
10134 restore_regs_via_mov = true;
10135 else if (frame_pointer_needed
10136 && TARGET_USE_LEAVE
10137 && cfun->machine->use_fast_prologue_epilogue
10138 && frame.nregs == 1)
10139 restore_regs_via_mov = true;
10141 restore_regs_via_mov = false;
10143 if (restore_regs_via_mov || frame.nsseregs)
10145 /* Ensure that the entire register save area is addressable via
10146 the stack pointer, if we will restore via sp. */
10148 && m->fs.sp_offset > 0x7fffffff
10149 && !(m->fs.fp_valid || m->fs.drap_valid)
10150 && (frame.nsseregs + frame.nregs) != 0)
10152 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10153 GEN_INT (m->fs.sp_offset
10154 - frame.sse_reg_save_offset),
10156 m->fs.cfa_reg == stack_pointer_rtx);
10160 /* If there are any SSE registers to restore, then we have to do it
10161 via moves, since there's obviously no pop for SSE regs. */
10162 if (frame.nsseregs)
10163 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10166 if (restore_regs_via_mov)
10171 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10173 /* eh_return epilogues need %ecx added to the stack pointer. */
10176 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10178 /* Stack align doesn't work with eh_return. */
10179 gcc_assert (!stack_realign_drap);
10180 /* Neither does regparm nested functions. */
10181 gcc_assert (!ix86_static_chain_on_stack);
10183 if (frame_pointer_needed)
10185 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10186 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10187 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10189 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10190 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10192 /* Note that we use SA as a temporary CFA, as the return
10193 address is at the proper place relative to it. We
10194 pretend this happens at the FP restore insn because
10195 prior to this insn the FP would be stored at the wrong
10196 offset relative to SA, and after this insn we have no
10197 other reasonable register to use for the CFA. We don't
10198 bother resetting the CFA to the SP for the duration of
10199 the return insn. */
10200 add_reg_note (insn, REG_CFA_DEF_CFA,
10201 plus_constant (sa, UNITS_PER_WORD));
10202 ix86_add_queued_cfa_restore_notes (insn);
10203 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10204 RTX_FRAME_RELATED_P (insn) = 1;
10206 m->fs.cfa_reg = sa;
10207 m->fs.cfa_offset = UNITS_PER_WORD;
10208 m->fs.fp_valid = false;
10210 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10211 const0_rtx, style, false);
10215 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10216 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10217 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10218 ix86_add_queued_cfa_restore_notes (insn);
10220 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10221 if (m->fs.cfa_offset != UNITS_PER_WORD)
10223 m->fs.cfa_offset = UNITS_PER_WORD;
10224 add_reg_note (insn, REG_CFA_DEF_CFA,
10225 plus_constant (stack_pointer_rtx,
10227 RTX_FRAME_RELATED_P (insn) = 1;
10230 m->fs.sp_offset = UNITS_PER_WORD;
10231 m->fs.sp_valid = true;
10236 /* First step is to deallocate the stack frame so that we can
10237 pop the registers. */
10238 if (!m->fs.sp_valid)
10240 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10241 GEN_INT (m->fs.fp_offset
10242 - frame.reg_save_offset),
10245 else if (m->fs.sp_offset != frame.reg_save_offset)
10247 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10248 GEN_INT (m->fs.sp_offset
10249 - frame.reg_save_offset),
10251 m->fs.cfa_reg == stack_pointer_rtx);
10254 ix86_emit_restore_regs_using_pop ();
10257 /* If we used a stack pointer and haven't already got rid of it,
10259 if (m->fs.fp_valid)
10261 /* If the stack pointer is valid and pointing at the frame
10262 pointer store address, then we only need a pop. */
10263 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10264 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10265 /* Leave results in shorter dependency chains on CPUs that are
10266 able to grok it fast. */
10267 else if (TARGET_USE_LEAVE
10268 || optimize_function_for_size_p (cfun)
10269 || !cfun->machine->use_fast_prologue_epilogue)
10270 ix86_emit_leave ();
10273 pro_epilogue_adjust_stack (stack_pointer_rtx,
10274 hard_frame_pointer_rtx,
10275 const0_rtx, style, !using_drap);
10276 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10282 int param_ptr_offset = UNITS_PER_WORD;
10285 gcc_assert (stack_realign_drap);
10287 if (ix86_static_chain_on_stack)
10288 param_ptr_offset += UNITS_PER_WORD;
10289 if (!call_used_regs[REGNO (crtl->drap_reg)])
10290 param_ptr_offset += UNITS_PER_WORD;
10292 insn = emit_insn (gen_rtx_SET
10293 (VOIDmode, stack_pointer_rtx,
10294 gen_rtx_PLUS (Pmode,
10296 GEN_INT (-param_ptr_offset))));
10297 m->fs.cfa_reg = stack_pointer_rtx;
10298 m->fs.cfa_offset = param_ptr_offset;
10299 m->fs.sp_offset = param_ptr_offset;
10300 m->fs.realigned = false;
10302 add_reg_note (insn, REG_CFA_DEF_CFA,
10303 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10304 GEN_INT (param_ptr_offset)));
10305 RTX_FRAME_RELATED_P (insn) = 1;
10307 if (!call_used_regs[REGNO (crtl->drap_reg)])
10308 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10311 /* At this point the stack pointer must be valid, and we must have
10312 restored all of the registers. We may not have deallocated the
10313 entire stack frame. We've delayed this until now because it may
10314 be possible to merge the local stack deallocation with the
10315 deallocation forced by ix86_static_chain_on_stack. */
10316 gcc_assert (m->fs.sp_valid);
10317 gcc_assert (!m->fs.fp_valid);
10318 gcc_assert (!m->fs.realigned);
10319 if (m->fs.sp_offset != UNITS_PER_WORD)
10321 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10322 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10326 /* Sibcall epilogues don't want a return instruction. */
10329 m->fs = frame_state_save;
10333 if (crtl->args.pops_args && crtl->args.size)
10335 rtx popc = GEN_INT (crtl->args.pops_args);
10337 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10338 address, do explicit add, and jump indirectly to the caller. */
10340 if (crtl->args.pops_args >= 65536)
10342 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10345 /* There is no "pascal" calling convention in any 64bit ABI. */
10346 gcc_assert (!TARGET_64BIT);
10348 insn = emit_insn (gen_pop (ecx));
10349 m->fs.cfa_offset -= UNITS_PER_WORD;
10350 m->fs.sp_offset -= UNITS_PER_WORD;
10352 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10353 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10354 add_reg_note (insn, REG_CFA_REGISTER,
10355 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10356 RTX_FRAME_RELATED_P (insn) = 1;
10358 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10360 emit_jump_insn (gen_return_indirect_internal (ecx));
10363 emit_jump_insn (gen_return_pop_internal (popc));
10366 emit_jump_insn (gen_return_internal ());
10368 /* Restore the state back to the state from the prologue,
10369 so that it's correct for the next epilogue. */
10370 m->fs = frame_state_save;
10373 /* Reset from the function's potential modifications. */
10376 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10377 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10379 if (pic_offset_table_rtx)
10380 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10382 /* Mach-O doesn't support labels at the end of objects, so if
10383 it looks like we might want one, insert a NOP. */
10385 rtx insn = get_last_insn ();
10388 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10389 insn = PREV_INSN (insn);
10393 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10394 fputs ("\tnop\n", file);
10400 /* Return a scratch register to use in the split stack prologue. The
10401 split stack prologue is used for -fsplit-stack. It is the first
10402 instructions in the function, even before the regular prologue.
10403 The scratch register can be any caller-saved register which is not
10404 used for parameters or for the static chain. */
10406 static unsigned int
10407 split_stack_prologue_scratch_regno (void)
10416 is_fastcall = (lookup_attribute ("fastcall",
10417 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10419 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10423 if (DECL_STATIC_CHAIN (cfun->decl))
10425 sorry ("-fsplit-stack does not support fastcall with "
10426 "nested function");
10427 return INVALID_REGNUM;
10431 else if (regparm < 3)
10433 if (!DECL_STATIC_CHAIN (cfun->decl))
10439 sorry ("-fsplit-stack does not support 2 register "
10440 " parameters for a nested function");
10441 return INVALID_REGNUM;
10448 /* FIXME: We could make this work by pushing a register
10449 around the addition and comparison. */
10450 sorry ("-fsplit-stack does not support 3 register parameters");
10451 return INVALID_REGNUM;
10456 /* A SYMBOL_REF for the function which allocates new stackspace for
10459 static GTY(()) rtx split_stack_fn;
10461 /* Handle -fsplit-stack. These are the first instructions in the
10462 function, even before the regular prologue. */
10465 ix86_expand_split_stack_prologue (void)
10467 struct ix86_frame frame;
10468 HOST_WIDE_INT allocate;
10470 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10471 rtx scratch_reg = NULL_RTX;
10472 rtx varargs_label = NULL_RTX;
10474 gcc_assert (flag_split_stack && reload_completed);
10476 ix86_finalize_stack_realign_flags ();
10477 ix86_compute_frame_layout (&frame);
10478 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10480 /* This is the label we will branch to if we have enough stack
10481 space. We expect the basic block reordering pass to reverse this
10482 branch if optimizing, so that we branch in the unlikely case. */
10483 label = gen_label_rtx ();
10485 /* We need to compare the stack pointer minus the frame size with
10486 the stack boundary in the TCB. The stack boundary always gives
10487 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10488 can compare directly. Otherwise we need to do an addition. */
10490 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10491 UNSPEC_STACK_CHECK);
10492 limit = gen_rtx_CONST (Pmode, limit);
10493 limit = gen_rtx_MEM (Pmode, limit);
10494 if (allocate < SPLIT_STACK_AVAILABLE)
10495 current = stack_pointer_rtx;
10498 unsigned int scratch_regno;
10501 /* We need a scratch register to hold the stack pointer minus
10502 the required frame size. Since this is the very start of the
10503 function, the scratch register can be any caller-saved
10504 register which is not used for parameters. */
10505 offset = GEN_INT (- allocate);
10506 scratch_regno = split_stack_prologue_scratch_regno ();
10507 if (scratch_regno == INVALID_REGNUM)
10509 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10510 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10512 /* We don't use ix86_gen_add3 in this case because it will
10513 want to split to lea, but when not optimizing the insn
10514 will not be split after this point. */
10515 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10516 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10521 emit_move_insn (scratch_reg, offset);
10522 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10523 stack_pointer_rtx));
10525 current = scratch_reg;
10528 ix86_expand_branch (GEU, current, limit, label);
10529 jump_insn = get_last_insn ();
10530 JUMP_LABEL (jump_insn) = label;
10532 /* Mark the jump as very likely to be taken. */
10533 add_reg_note (jump_insn, REG_BR_PROB,
10534 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10536 /* Get more stack space. We pass in the desired stack space and the
10537 size of the arguments to copy to the new stack. In 32-bit mode
10538 we push the parameters; __morestack will return on a new stack
10539 anyhow. In 64-bit mode we pass the parameters in r10 and
10541 allocate_rtx = GEN_INT (allocate);
10542 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10543 call_fusage = NULL_RTX;
10548 reg = gen_rtx_REG (Pmode, R10_REG);
10550 /* If this function uses a static chain, it will be in %r10.
10551 Preserve it across the call to __morestack. */
10552 if (DECL_STATIC_CHAIN (cfun->decl))
10556 rax = gen_rtx_REG (Pmode, AX_REG);
10557 emit_move_insn (rax, reg);
10558 use_reg (&call_fusage, rax);
10561 emit_move_insn (reg, allocate_rtx);
10562 use_reg (&call_fusage, reg);
10563 reg = gen_rtx_REG (Pmode, R11_REG);
10564 emit_move_insn (reg, GEN_INT (args_size));
10565 use_reg (&call_fusage, reg);
10569 emit_insn (gen_push (GEN_INT (args_size)));
10570 emit_insn (gen_push (allocate_rtx));
10572 if (split_stack_fn == NULL_RTX)
10573 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10574 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
10575 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10577 add_function_usage_to (call_insn, call_fusage);
10579 /* In order to make call/return prediction work right, we now need
10580 to execute a return instruction. See
10581 libgcc/config/i386/morestack.S for the details on how this works.
10583 For flow purposes gcc must not see this as a return
10584 instruction--we need control flow to continue at the subsequent
10585 label. Therefore, we use an unspec. */
10586 gcc_assert (crtl->args.pops_args < 65536);
10587 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10589 /* If we are in 64-bit mode and this function uses a static chain,
10590 we saved %r10 in %rax before calling _morestack. */
10591 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10592 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10593 gen_rtx_REG (Pmode, AX_REG));
10595 /* If this function calls va_start, we need to store a pointer to
10596 the arguments on the old stack, because they may not have been
10597 all copied to the new stack. At this point the old stack can be
10598 found at the frame pointer value used by __morestack, because
10599 __morestack has set that up before calling back to us. Here we
10600 store that pointer in a scratch register, and in
10601 ix86_expand_prologue we store the scratch register in a stack
10603 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10605 unsigned int scratch_regno;
10609 scratch_regno = split_stack_prologue_scratch_regno ();
10610 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10611 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10615 return address within this function
10616 return address of caller of this function
10618 So we add three words to get to the stack arguments.
10622 return address within this function
10623 first argument to __morestack
10624 second argument to __morestack
10625 return address of caller of this function
10627 So we add five words to get to the stack arguments.
10629 words = TARGET_64BIT ? 3 : 5;
10630 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10631 gen_rtx_PLUS (Pmode, frame_reg,
10632 GEN_INT (words * UNITS_PER_WORD))));
10634 varargs_label = gen_label_rtx ();
10635 emit_jump_insn (gen_jump (varargs_label));
10636 JUMP_LABEL (get_last_insn ()) = varargs_label;
10641 emit_label (label);
10642 LABEL_NUSES (label) = 1;
10644 /* If this function calls va_start, we now have to set the scratch
10645 register for the case where we do not call __morestack. In this
10646 case we need to set it based on the stack pointer. */
10647 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10649 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10650 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10651 GEN_INT (UNITS_PER_WORD))));
10653 emit_label (varargs_label);
10654 LABEL_NUSES (varargs_label) = 1;
10658 /* We may have to tell the dataflow pass that the split stack prologue
10659 is initializing a scratch register. */
10662 ix86_live_on_entry (bitmap regs)
10664 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10666 gcc_assert (flag_split_stack);
10667 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10671 /* Extract the parts of an RTL expression that is a valid memory address
10672 for an instruction. Return 0 if the structure of the address is
10673 grossly off. Return -1 if the address contains ASHIFT, so it is not
10674 strictly valid, but still used for computing length of lea instruction. */
10677 ix86_decompose_address (rtx addr, struct ix86_address *out)
10679 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10680 rtx base_reg, index_reg;
10681 HOST_WIDE_INT scale = 1;
10682 rtx scale_rtx = NULL_RTX;
10685 enum ix86_address_seg seg = SEG_DEFAULT;
10687 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10689 else if (GET_CODE (addr) == PLUS)
10691 rtx addends[4], op;
10699 addends[n++] = XEXP (op, 1);
10702 while (GET_CODE (op) == PLUS);
10707 for (i = n; i >= 0; --i)
10710 switch (GET_CODE (op))
10715 index = XEXP (op, 0);
10716 scale_rtx = XEXP (op, 1);
10722 index = XEXP (op, 0);
10723 tmp = XEXP (op, 1);
10724 if (!CONST_INT_P (tmp))
10726 scale = INTVAL (tmp);
10727 if ((unsigned HOST_WIDE_INT) scale > 3)
10729 scale = 1 << scale;
10733 if (XINT (op, 1) == UNSPEC_TP
10734 && TARGET_TLS_DIRECT_SEG_REFS
10735 && seg == SEG_DEFAULT)
10736 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10765 else if (GET_CODE (addr) == MULT)
10767 index = XEXP (addr, 0); /* index*scale */
10768 scale_rtx = XEXP (addr, 1);
10770 else if (GET_CODE (addr) == ASHIFT)
10772 /* We're called for lea too, which implements ashift on occasion. */
10773 index = XEXP (addr, 0);
10774 tmp = XEXP (addr, 1);
10775 if (!CONST_INT_P (tmp))
10777 scale = INTVAL (tmp);
10778 if ((unsigned HOST_WIDE_INT) scale > 3)
10780 scale = 1 << scale;
10784 disp = addr; /* displacement */
10786 /* Extract the integral value of scale. */
10789 if (!CONST_INT_P (scale_rtx))
10791 scale = INTVAL (scale_rtx);
10794 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10795 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10797 /* Avoid useless 0 displacement. */
10798 if (disp == const0_rtx && (base || index))
10801 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10802 if (base_reg && index_reg && scale == 1
10803 && (index_reg == arg_pointer_rtx
10804 || index_reg == frame_pointer_rtx
10805 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10808 tmp = base, base = index, index = tmp;
10809 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10812 /* Special case: %ebp cannot be encoded as a base without a displacement.
10816 && (base_reg == hard_frame_pointer_rtx
10817 || base_reg == frame_pointer_rtx
10818 || base_reg == arg_pointer_rtx
10819 || (REG_P (base_reg)
10820 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10821 || REGNO (base_reg) == R13_REG))))
10824 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10825 Avoid this by transforming to [%esi+0].
10826 Reload calls address legitimization without cfun defined, so we need
10827 to test cfun for being non-NULL. */
10828 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10829 && base_reg && !index_reg && !disp
10830 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10833 /* Special case: encode reg+reg instead of reg*2. */
10834 if (!base && index && scale == 2)
10835 base = index, base_reg = index_reg, scale = 1;
10837 /* Special case: scaling cannot be encoded without base or displacement. */
10838 if (!base && !disp && index && scale != 1)
10842 out->index = index;
10844 out->scale = scale;
10850 /* Return cost of the memory address x.
10851 For i386, it is better to use a complex address than let gcc copy
10852 the address into a reg and make a new pseudo. But not if the address
10853 requires to two regs - that would mean more pseudos with longer
10856 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10858 struct ix86_address parts;
10860 int ok = ix86_decompose_address (x, &parts);
10864 if (parts.base && GET_CODE (parts.base) == SUBREG)
10865 parts.base = SUBREG_REG (parts.base);
10866 if (parts.index && GET_CODE (parts.index) == SUBREG)
10867 parts.index = SUBREG_REG (parts.index);
10869 /* Attempt to minimize number of registers in the address. */
10871 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10873 && (!REG_P (parts.index)
10874 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10878 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10880 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10881 && parts.base != parts.index)
10884 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10885 since it's predecode logic can't detect the length of instructions
10886 and it degenerates to vector decoded. Increase cost of such
10887 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10888 to split such addresses or even refuse such addresses at all.
10890 Following addressing modes are affected:
10895 The first and last case may be avoidable by explicitly coding the zero in
10896 memory address, but I don't have AMD-K6 machine handy to check this
10900 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10901 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10902 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10908 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10909 this is used for to form addresses to local data when -fPIC is in
10913 darwin_local_data_pic (rtx disp)
10915 return (GET_CODE (disp) == UNSPEC
10916 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10919 /* Determine if a given RTX is a valid constant. We already know this
10920 satisfies CONSTANT_P. */
10923 legitimate_constant_p (rtx x)
10925 switch (GET_CODE (x))
10930 if (GET_CODE (x) == PLUS)
10932 if (!CONST_INT_P (XEXP (x, 1)))
10937 if (TARGET_MACHO && darwin_local_data_pic (x))
10940 /* Only some unspecs are valid as "constants". */
10941 if (GET_CODE (x) == UNSPEC)
10942 switch (XINT (x, 1))
10945 case UNSPEC_GOTOFF:
10946 case UNSPEC_PLTOFF:
10947 return TARGET_64BIT;
10949 case UNSPEC_NTPOFF:
10950 x = XVECEXP (x, 0, 0);
10951 return (GET_CODE (x) == SYMBOL_REF
10952 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10953 case UNSPEC_DTPOFF:
10954 x = XVECEXP (x, 0, 0);
10955 return (GET_CODE (x) == SYMBOL_REF
10956 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10961 /* We must have drilled down to a symbol. */
10962 if (GET_CODE (x) == LABEL_REF)
10964 if (GET_CODE (x) != SYMBOL_REF)
10969 /* TLS symbols are never valid. */
10970 if (SYMBOL_REF_TLS_MODEL (x))
10973 /* DLLIMPORT symbols are never valid. */
10974 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10975 && SYMBOL_REF_DLLIMPORT_P (x))
10980 if (GET_MODE (x) == TImode
10981 && x != CONST0_RTX (TImode)
10987 if (!standard_sse_constant_p (x))
10994 /* Otherwise we handle everything else in the move patterns. */
10998 /* Determine if it's legal to put X into the constant pool. This
10999 is not possible for the address of thread-local symbols, which
11000 is checked above. */
11003 ix86_cannot_force_const_mem (rtx x)
11005 /* We can always put integral constants and vectors in memory. */
11006 switch (GET_CODE (x))
11016 return !legitimate_constant_p (x);
11020 /* Nonzero if the constant value X is a legitimate general operand
11021 when generating PIC code. It is given that flag_pic is on and
11022 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11025 legitimate_pic_operand_p (rtx x)
11029 switch (GET_CODE (x))
11032 inner = XEXP (x, 0);
11033 if (GET_CODE (inner) == PLUS
11034 && CONST_INT_P (XEXP (inner, 1)))
11035 inner = XEXP (inner, 0);
11037 /* Only some unspecs are valid as "constants". */
11038 if (GET_CODE (inner) == UNSPEC)
11039 switch (XINT (inner, 1))
11042 case UNSPEC_GOTOFF:
11043 case UNSPEC_PLTOFF:
11044 return TARGET_64BIT;
11046 x = XVECEXP (inner, 0, 0);
11047 return (GET_CODE (x) == SYMBOL_REF
11048 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11049 case UNSPEC_MACHOPIC_OFFSET:
11050 return legitimate_pic_address_disp_p (x);
11058 return legitimate_pic_address_disp_p (x);
11065 /* Determine if a given CONST RTX is a valid memory displacement
11069 legitimate_pic_address_disp_p (rtx disp)
11073 /* In 64bit mode we can allow direct addresses of symbols and labels
11074 when they are not dynamic symbols. */
11077 rtx op0 = disp, op1;
11079 switch (GET_CODE (disp))
11085 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11087 op0 = XEXP (XEXP (disp, 0), 0);
11088 op1 = XEXP (XEXP (disp, 0), 1);
11089 if (!CONST_INT_P (op1)
11090 || INTVAL (op1) >= 16*1024*1024
11091 || INTVAL (op1) < -16*1024*1024)
11093 if (GET_CODE (op0) == LABEL_REF)
11095 if (GET_CODE (op0) != SYMBOL_REF)
11100 /* TLS references should always be enclosed in UNSPEC. */
11101 if (SYMBOL_REF_TLS_MODEL (op0))
11103 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11104 && ix86_cmodel != CM_LARGE_PIC)
11112 if (GET_CODE (disp) != CONST)
11114 disp = XEXP (disp, 0);
11118 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11119 of GOT tables. We should not need these anyway. */
11120 if (GET_CODE (disp) != UNSPEC
11121 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11122 && XINT (disp, 1) != UNSPEC_GOTOFF
11123 && XINT (disp, 1) != UNSPEC_PLTOFF))
11126 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11127 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11133 if (GET_CODE (disp) == PLUS)
11135 if (!CONST_INT_P (XEXP (disp, 1)))
11137 disp = XEXP (disp, 0);
11141 if (TARGET_MACHO && darwin_local_data_pic (disp))
11144 if (GET_CODE (disp) != UNSPEC)
11147 switch (XINT (disp, 1))
11152 /* We need to check for both symbols and labels because VxWorks loads
11153 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11155 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11156 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11157 case UNSPEC_GOTOFF:
11158 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11159 While ABI specify also 32bit relocation but we don't produce it in
11160 small PIC model at all. */
11161 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11162 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11164 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11166 case UNSPEC_GOTTPOFF:
11167 case UNSPEC_GOTNTPOFF:
11168 case UNSPEC_INDNTPOFF:
11171 disp = XVECEXP (disp, 0, 0);
11172 return (GET_CODE (disp) == SYMBOL_REF
11173 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11174 case UNSPEC_NTPOFF:
11175 disp = XVECEXP (disp, 0, 0);
11176 return (GET_CODE (disp) == SYMBOL_REF
11177 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11178 case UNSPEC_DTPOFF:
11179 disp = XVECEXP (disp, 0, 0);
11180 return (GET_CODE (disp) == SYMBOL_REF
11181 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11187 /* Recognizes RTL expressions that are valid memory addresses for an
11188 instruction. The MODE argument is the machine mode for the MEM
11189 expression that wants to use this address.
11191 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11192 convert common non-canonical forms to canonical form so that they will
11196 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11197 rtx addr, bool strict)
11199 struct ix86_address parts;
11200 rtx base, index, disp;
11201 HOST_WIDE_INT scale;
11203 if (ix86_decompose_address (addr, &parts) <= 0)
11204 /* Decomposition failed. */
11208 index = parts.index;
11210 scale = parts.scale;
11212 /* Validate base register.
11214 Don't allow SUBREG's that span more than a word here. It can lead to spill
11215 failures when the base is one word out of a two word structure, which is
11216 represented internally as a DImode int. */
11224 else if (GET_CODE (base) == SUBREG
11225 && REG_P (SUBREG_REG (base))
11226 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11228 reg = SUBREG_REG (base);
11230 /* Base is not a register. */
11233 if (GET_MODE (base) != Pmode)
11234 /* Base is not in Pmode. */
11237 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11238 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11239 /* Base is not valid. */
11243 /* Validate index register.
11245 Don't allow SUBREG's that span more than a word here -- same as above. */
11253 else if (GET_CODE (index) == SUBREG
11254 && REG_P (SUBREG_REG (index))
11255 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11257 reg = SUBREG_REG (index);
11259 /* Index is not a register. */
11262 if (GET_MODE (index) != Pmode)
11263 /* Index is not in Pmode. */
11266 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11267 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11268 /* Index is not valid. */
11272 /* Validate scale factor. */
11276 /* Scale without index. */
11279 if (scale != 2 && scale != 4 && scale != 8)
11280 /* Scale is not a valid multiplier. */
11284 /* Validate displacement. */
11287 if (GET_CODE (disp) == CONST
11288 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11289 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11290 switch (XINT (XEXP (disp, 0), 1))
11292 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11293 used. While ABI specify also 32bit relocations, we don't produce
11294 them at all and use IP relative instead. */
11296 case UNSPEC_GOTOFF:
11297 gcc_assert (flag_pic);
11299 goto is_legitimate_pic;
11301 /* 64bit address unspec. */
11304 case UNSPEC_GOTPCREL:
11305 gcc_assert (flag_pic);
11306 goto is_legitimate_pic;
11308 case UNSPEC_GOTTPOFF:
11309 case UNSPEC_GOTNTPOFF:
11310 case UNSPEC_INDNTPOFF:
11311 case UNSPEC_NTPOFF:
11312 case UNSPEC_DTPOFF:
11315 case UNSPEC_STACK_CHECK:
11316 gcc_assert (flag_split_stack);
11320 /* Invalid address unspec. */
11324 else if (SYMBOLIC_CONST (disp)
11328 && MACHOPIC_INDIRECT
11329 && !machopic_operand_p (disp)
11335 if (TARGET_64BIT && (index || base))
11337 /* foo@dtpoff(%rX) is ok. */
11338 if (GET_CODE (disp) != CONST
11339 || GET_CODE (XEXP (disp, 0)) != PLUS
11340 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11341 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11342 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11343 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11344 /* Non-constant pic memory reference. */
11347 else if (! legitimate_pic_address_disp_p (disp))
11348 /* Displacement is an invalid pic construct. */
11351 /* This code used to verify that a symbolic pic displacement
11352 includes the pic_offset_table_rtx register.
11354 While this is good idea, unfortunately these constructs may
11355 be created by "adds using lea" optimization for incorrect
11364 This code is nonsensical, but results in addressing
11365 GOT table with pic_offset_table_rtx base. We can't
11366 just refuse it easily, since it gets matched by
11367 "addsi3" pattern, that later gets split to lea in the
11368 case output register differs from input. While this
11369 can be handled by separate addsi pattern for this case
11370 that never results in lea, this seems to be easier and
11371 correct fix for crash to disable this test. */
11373 else if (GET_CODE (disp) != LABEL_REF
11374 && !CONST_INT_P (disp)
11375 && (GET_CODE (disp) != CONST
11376 || !legitimate_constant_p (disp))
11377 && (GET_CODE (disp) != SYMBOL_REF
11378 || !legitimate_constant_p (disp)))
11379 /* Displacement is not constant. */
11381 else if (TARGET_64BIT
11382 && !x86_64_immediate_operand (disp, VOIDmode))
11383 /* Displacement is out of range. */
11387 /* Everything looks valid. */
11391 /* Determine if a given RTX is a valid constant address. */
11394 constant_address_p (rtx x)
11396 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11399 /* Return a unique alias set for the GOT. */
11401 static alias_set_type
11402 ix86_GOT_alias_set (void)
11404 static alias_set_type set = -1;
11406 set = new_alias_set ();
11410 /* Return a legitimate reference for ORIG (an address) using the
11411 register REG. If REG is 0, a new pseudo is generated.
11413 There are two types of references that must be handled:
11415 1. Global data references must load the address from the GOT, via
11416 the PIC reg. An insn is emitted to do this load, and the reg is
11419 2. Static data references, constant pool addresses, and code labels
11420 compute the address as an offset from the GOT, whose base is in
11421 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11422 differentiate them from global data objects. The returned
11423 address is the PIC reg + an unspec constant.
11425 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11426 reg also appears in the address. */
11429 legitimize_pic_address (rtx orig, rtx reg)
11432 rtx new_rtx = orig;
11436 if (TARGET_MACHO && !TARGET_64BIT)
11439 reg = gen_reg_rtx (Pmode);
11440 /* Use the generic Mach-O PIC machinery. */
11441 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11445 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11447 else if (TARGET_64BIT
11448 && ix86_cmodel != CM_SMALL_PIC
11449 && gotoff_operand (addr, Pmode))
11452 /* This symbol may be referenced via a displacement from the PIC
11453 base address (@GOTOFF). */
11455 if (reload_in_progress)
11456 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11457 if (GET_CODE (addr) == CONST)
11458 addr = XEXP (addr, 0);
11459 if (GET_CODE (addr) == PLUS)
11461 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11463 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11466 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11467 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11469 tmpreg = gen_reg_rtx (Pmode);
11472 emit_move_insn (tmpreg, new_rtx);
11476 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11477 tmpreg, 1, OPTAB_DIRECT);
11480 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11482 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11484 /* This symbol may be referenced via a displacement from the PIC
11485 base address (@GOTOFF). */
11487 if (reload_in_progress)
11488 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11489 if (GET_CODE (addr) == CONST)
11490 addr = XEXP (addr, 0);
11491 if (GET_CODE (addr) == PLUS)
11493 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11495 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11498 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11499 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11500 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11504 emit_move_insn (reg, new_rtx);
11508 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11509 /* We can't use @GOTOFF for text labels on VxWorks;
11510 see gotoff_operand. */
11511 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11513 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11515 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11516 return legitimize_dllimport_symbol (addr, true);
11517 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11518 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11519 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11521 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11522 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11526 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11528 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11529 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11530 new_rtx = gen_const_mem (Pmode, new_rtx);
11531 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11534 reg = gen_reg_rtx (Pmode);
11535 /* Use directly gen_movsi, otherwise the address is loaded
11536 into register for CSE. We don't want to CSE this addresses,
11537 instead we CSE addresses from the GOT table, so skip this. */
11538 emit_insn (gen_movsi (reg, new_rtx));
11543 /* This symbol must be referenced via a load from the
11544 Global Offset Table (@GOT). */
11546 if (reload_in_progress)
11547 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11548 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11549 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11551 new_rtx = force_reg (Pmode, new_rtx);
11552 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11553 new_rtx = gen_const_mem (Pmode, new_rtx);
11554 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11557 reg = gen_reg_rtx (Pmode);
11558 emit_move_insn (reg, new_rtx);
11564 if (CONST_INT_P (addr)
11565 && !x86_64_immediate_operand (addr, VOIDmode))
11569 emit_move_insn (reg, addr);
11573 new_rtx = force_reg (Pmode, addr);
11575 else if (GET_CODE (addr) == CONST)
11577 addr = XEXP (addr, 0);
11579 /* We must match stuff we generate before. Assume the only
11580 unspecs that can get here are ours. Not that we could do
11581 anything with them anyway.... */
11582 if (GET_CODE (addr) == UNSPEC
11583 || (GET_CODE (addr) == PLUS
11584 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11586 gcc_assert (GET_CODE (addr) == PLUS);
11588 if (GET_CODE (addr) == PLUS)
11590 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11592 /* Check first to see if this is a constant offset from a @GOTOFF
11593 symbol reference. */
11594 if (gotoff_operand (op0, Pmode)
11595 && CONST_INT_P (op1))
11599 if (reload_in_progress)
11600 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11601 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11603 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11604 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11605 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11609 emit_move_insn (reg, new_rtx);
11615 if (INTVAL (op1) < -16*1024*1024
11616 || INTVAL (op1) >= 16*1024*1024)
11618 if (!x86_64_immediate_operand (op1, Pmode))
11619 op1 = force_reg (Pmode, op1);
11620 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11626 base = legitimize_pic_address (XEXP (addr, 0), reg);
11627 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11628 base == reg ? NULL_RTX : reg);
11630 if (CONST_INT_P (new_rtx))
11631 new_rtx = plus_constant (base, INTVAL (new_rtx));
11634 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11636 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11637 new_rtx = XEXP (new_rtx, 1);
11639 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11647 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11650 get_thread_pointer (int to_reg)
11654 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11658 reg = gen_reg_rtx (Pmode);
11659 insn = gen_rtx_SET (VOIDmode, reg, tp);
11660 insn = emit_insn (insn);
11665 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11666 false if we expect this to be used for a memory address and true if
11667 we expect to load the address into a register. */
11670 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11672 rtx dest, base, off, pic, tp;
11677 case TLS_MODEL_GLOBAL_DYNAMIC:
11678 dest = gen_reg_rtx (Pmode);
11679 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11681 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11683 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11686 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11687 insns = get_insns ();
11690 RTL_CONST_CALL_P (insns) = 1;
11691 emit_libcall_block (insns, dest, rax, x);
11693 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11694 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11696 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11698 if (TARGET_GNU2_TLS)
11700 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11702 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11706 case TLS_MODEL_LOCAL_DYNAMIC:
11707 base = gen_reg_rtx (Pmode);
11708 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11710 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11712 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11715 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11716 insns = get_insns ();
11719 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11720 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11721 RTL_CONST_CALL_P (insns) = 1;
11722 emit_libcall_block (insns, base, rax, note);
11724 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11725 emit_insn (gen_tls_local_dynamic_base_64 (base));
11727 emit_insn (gen_tls_local_dynamic_base_32 (base));
11729 if (TARGET_GNU2_TLS)
11731 rtx x = ix86_tls_module_base ();
11733 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11734 gen_rtx_MINUS (Pmode, x, tp));
11737 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11738 off = gen_rtx_CONST (Pmode, off);
11740 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11742 if (TARGET_GNU2_TLS)
11744 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11746 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11751 case TLS_MODEL_INITIAL_EXEC:
11755 type = UNSPEC_GOTNTPOFF;
11759 if (reload_in_progress)
11760 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11761 pic = pic_offset_table_rtx;
11762 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11764 else if (!TARGET_ANY_GNU_TLS)
11766 pic = gen_reg_rtx (Pmode);
11767 emit_insn (gen_set_got (pic));
11768 type = UNSPEC_GOTTPOFF;
11773 type = UNSPEC_INDNTPOFF;
11776 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11777 off = gen_rtx_CONST (Pmode, off);
11779 off = gen_rtx_PLUS (Pmode, pic, off);
11780 off = gen_const_mem (Pmode, off);
11781 set_mem_alias_set (off, ix86_GOT_alias_set ());
11783 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11785 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11786 off = force_reg (Pmode, off);
11787 return gen_rtx_PLUS (Pmode, base, off);
11791 base = get_thread_pointer (true);
11792 dest = gen_reg_rtx (Pmode);
11793 emit_insn (gen_subsi3 (dest, base, off));
11797 case TLS_MODEL_LOCAL_EXEC:
11798 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11799 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11800 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11801 off = gen_rtx_CONST (Pmode, off);
11803 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11805 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11806 return gen_rtx_PLUS (Pmode, base, off);
11810 base = get_thread_pointer (true);
11811 dest = gen_reg_rtx (Pmode);
11812 emit_insn (gen_subsi3 (dest, base, off));
11817 gcc_unreachable ();
11823 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11826 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11827 htab_t dllimport_map;
11830 get_dllimport_decl (tree decl)
11832 struct tree_map *h, in;
11835 const char *prefix;
11836 size_t namelen, prefixlen;
11841 if (!dllimport_map)
11842 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11844 in.hash = htab_hash_pointer (decl);
11845 in.base.from = decl;
11846 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11847 h = (struct tree_map *) *loc;
11851 *loc = h = ggc_alloc_tree_map ();
11853 h->base.from = decl;
11854 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11855 VAR_DECL, NULL, ptr_type_node);
11856 DECL_ARTIFICIAL (to) = 1;
11857 DECL_IGNORED_P (to) = 1;
11858 DECL_EXTERNAL (to) = 1;
11859 TREE_READONLY (to) = 1;
11861 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11862 name = targetm.strip_name_encoding (name);
11863 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11864 ? "*__imp_" : "*__imp__";
11865 namelen = strlen (name);
11866 prefixlen = strlen (prefix);
11867 imp_name = (char *) alloca (namelen + prefixlen + 1);
11868 memcpy (imp_name, prefix, prefixlen);
11869 memcpy (imp_name + prefixlen, name, namelen + 1);
11871 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11872 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11873 SET_SYMBOL_REF_DECL (rtl, to);
11874 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11876 rtl = gen_const_mem (Pmode, rtl);
11877 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11879 SET_DECL_RTL (to, rtl);
11880 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11885 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11886 true if we require the result be a register. */
11889 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11894 gcc_assert (SYMBOL_REF_DECL (symbol));
11895 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11897 x = DECL_RTL (imp_decl);
11899 x = force_reg (Pmode, x);
11903 /* Try machine-dependent ways of modifying an illegitimate address
11904 to be legitimate. If we find one, return the new, valid address.
11905 This macro is used in only one place: `memory_address' in explow.c.
11907 OLDX is the address as it was before break_out_memory_refs was called.
11908 In some cases it is useful to look at this to decide what needs to be done.
11910 It is always safe for this macro to do nothing. It exists to recognize
11911 opportunities to optimize the output.
11913 For the 80386, we handle X+REG by loading X into a register R and
11914 using R+REG. R will go in a general reg and indexing will be used.
11915 However, if REG is a broken-out memory address or multiplication,
11916 nothing needs to be done because REG can certainly go in a general reg.
11918 When -fpic is used, special handling is needed for symbolic references.
11919 See comments by legitimize_pic_address in i386.c for details. */
11922 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11923 enum machine_mode mode)
11928 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11930 return legitimize_tls_address (x, (enum tls_model) log, false);
11931 if (GET_CODE (x) == CONST
11932 && GET_CODE (XEXP (x, 0)) == PLUS
11933 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11934 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11936 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11937 (enum tls_model) log, false);
11938 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11941 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11943 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11944 return legitimize_dllimport_symbol (x, true);
11945 if (GET_CODE (x) == CONST
11946 && GET_CODE (XEXP (x, 0)) == PLUS
11947 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11948 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11950 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11951 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11955 if (flag_pic && SYMBOLIC_CONST (x))
11956 return legitimize_pic_address (x, 0);
11958 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11959 if (GET_CODE (x) == ASHIFT
11960 && CONST_INT_P (XEXP (x, 1))
11961 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11964 log = INTVAL (XEXP (x, 1));
11965 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11966 GEN_INT (1 << log));
11969 if (GET_CODE (x) == PLUS)
11971 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11973 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11974 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11975 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11978 log = INTVAL (XEXP (XEXP (x, 0), 1));
11979 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11980 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11981 GEN_INT (1 << log));
11984 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11985 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11986 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11989 log = INTVAL (XEXP (XEXP (x, 1), 1));
11990 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11991 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11992 GEN_INT (1 << log));
11995 /* Put multiply first if it isn't already. */
11996 if (GET_CODE (XEXP (x, 1)) == MULT)
11998 rtx tmp = XEXP (x, 0);
11999 XEXP (x, 0) = XEXP (x, 1);
12004 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12005 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12006 created by virtual register instantiation, register elimination, and
12007 similar optimizations. */
12008 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12011 x = gen_rtx_PLUS (Pmode,
12012 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12013 XEXP (XEXP (x, 1), 0)),
12014 XEXP (XEXP (x, 1), 1));
12018 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12019 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12020 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12021 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12022 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12023 && CONSTANT_P (XEXP (x, 1)))
12026 rtx other = NULL_RTX;
12028 if (CONST_INT_P (XEXP (x, 1)))
12030 constant = XEXP (x, 1);
12031 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12033 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12035 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12036 other = XEXP (x, 1);
12044 x = gen_rtx_PLUS (Pmode,
12045 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12046 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12047 plus_constant (other, INTVAL (constant)));
12051 if (changed && ix86_legitimate_address_p (mode, x, false))
12054 if (GET_CODE (XEXP (x, 0)) == MULT)
12057 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12060 if (GET_CODE (XEXP (x, 1)) == MULT)
12063 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12067 && REG_P (XEXP (x, 1))
12068 && REG_P (XEXP (x, 0)))
12071 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12074 x = legitimize_pic_address (x, 0);
12077 if (changed && ix86_legitimate_address_p (mode, x, false))
12080 if (REG_P (XEXP (x, 0)))
12082 rtx temp = gen_reg_rtx (Pmode);
12083 rtx val = force_operand (XEXP (x, 1), temp);
12085 emit_move_insn (temp, val);
12087 XEXP (x, 1) = temp;
12091 else if (REG_P (XEXP (x, 1)))
12093 rtx temp = gen_reg_rtx (Pmode);
12094 rtx val = force_operand (XEXP (x, 0), temp);
12096 emit_move_insn (temp, val);
12098 XEXP (x, 0) = temp;
12106 /* Print an integer constant expression in assembler syntax. Addition
12107 and subtraction are the only arithmetic that may appear in these
12108 expressions. FILE is the stdio stream to write to, X is the rtx, and
12109 CODE is the operand print code from the output string. */
12112 output_pic_addr_const (FILE *file, rtx x, int code)
12116 switch (GET_CODE (x))
12119 gcc_assert (flag_pic);
12124 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12125 output_addr_const (file, x);
12128 const char *name = XSTR (x, 0);
12130 /* Mark the decl as referenced so that cgraph will
12131 output the function. */
12132 if (SYMBOL_REF_DECL (x))
12133 mark_decl_referenced (SYMBOL_REF_DECL (x));
12136 if (MACHOPIC_INDIRECT
12137 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12138 name = machopic_indirection_name (x, /*stub_p=*/true);
12140 assemble_name (file, name);
12142 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12143 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12144 fputs ("@PLT", file);
12151 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12152 assemble_name (asm_out_file, buf);
12156 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12160 /* This used to output parentheses around the expression,
12161 but that does not work on the 386 (either ATT or BSD assembler). */
12162 output_pic_addr_const (file, XEXP (x, 0), code);
12166 if (GET_MODE (x) == VOIDmode)
12168 /* We can use %d if the number is <32 bits and positive. */
12169 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12170 fprintf (file, "0x%lx%08lx",
12171 (unsigned long) CONST_DOUBLE_HIGH (x),
12172 (unsigned long) CONST_DOUBLE_LOW (x));
12174 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12177 /* We can't handle floating point constants;
12178 TARGET_PRINT_OPERAND must handle them. */
12179 output_operand_lossage ("floating constant misused");
12183 /* Some assemblers need integer constants to appear first. */
12184 if (CONST_INT_P (XEXP (x, 0)))
12186 output_pic_addr_const (file, XEXP (x, 0), code);
12188 output_pic_addr_const (file, XEXP (x, 1), code);
12192 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12193 output_pic_addr_const (file, XEXP (x, 1), code);
12195 output_pic_addr_const (file, XEXP (x, 0), code);
12201 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12202 output_pic_addr_const (file, XEXP (x, 0), code);
12204 output_pic_addr_const (file, XEXP (x, 1), code);
12206 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12210 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12212 bool f = i386_asm_output_addr_const_extra (file, x);
12217 gcc_assert (XVECLEN (x, 0) == 1);
12218 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12219 switch (XINT (x, 1))
12222 fputs ("@GOT", file);
12224 case UNSPEC_GOTOFF:
12225 fputs ("@GOTOFF", file);
12227 case UNSPEC_PLTOFF:
12228 fputs ("@PLTOFF", file);
12230 case UNSPEC_GOTPCREL:
12231 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12232 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12234 case UNSPEC_GOTTPOFF:
12235 /* FIXME: This might be @TPOFF in Sun ld too. */
12236 fputs ("@gottpoff", file);
12239 fputs ("@tpoff", file);
12241 case UNSPEC_NTPOFF:
12243 fputs ("@tpoff", file);
12245 fputs ("@ntpoff", file);
12247 case UNSPEC_DTPOFF:
12248 fputs ("@dtpoff", file);
12250 case UNSPEC_GOTNTPOFF:
12252 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12253 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12255 fputs ("@gotntpoff", file);
12257 case UNSPEC_INDNTPOFF:
12258 fputs ("@indntpoff", file);
12261 case UNSPEC_MACHOPIC_OFFSET:
12263 machopic_output_function_base_name (file);
12267 output_operand_lossage ("invalid UNSPEC as operand");
12273 output_operand_lossage ("invalid expression as operand");
12277 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12278 We need to emit DTP-relative relocations. */
12280 static void ATTRIBUTE_UNUSED
12281 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12283 fputs (ASM_LONG, file);
12284 output_addr_const (file, x);
12285 fputs ("@dtpoff", file);
12291 fputs (", 0", file);
12294 gcc_unreachable ();
12298 /* Return true if X is a representation of the PIC register. This copes
12299 with calls from ix86_find_base_term, where the register might have
12300 been replaced by a cselib value. */
12303 ix86_pic_register_p (rtx x)
12305 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12306 return (pic_offset_table_rtx
12307 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12309 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12312 /* Helper function for ix86_delegitimize_address.
12313 Attempt to delegitimize TLS local-exec accesses. */
12316 ix86_delegitimize_tls_address (rtx orig_x)
12318 rtx x = orig_x, unspec;
12319 struct ix86_address addr;
12321 if (!TARGET_TLS_DIRECT_SEG_REFS)
12325 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12327 if (ix86_decompose_address (x, &addr) == 0
12328 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12329 || addr.disp == NULL_RTX
12330 || GET_CODE (addr.disp) != CONST)
12332 unspec = XEXP (addr.disp, 0);
12333 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12334 unspec = XEXP (unspec, 0);
12335 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12337 x = XVECEXP (unspec, 0, 0);
12338 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12339 if (unspec != XEXP (addr.disp, 0))
12340 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12343 rtx idx = addr.index;
12344 if (addr.scale != 1)
12345 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12346 x = gen_rtx_PLUS (Pmode, idx, x);
12349 x = gen_rtx_PLUS (Pmode, addr.base, x);
12350 if (MEM_P (orig_x))
12351 x = replace_equiv_address_nv (orig_x, x);
12355 /* In the name of slightly smaller debug output, and to cater to
12356 general assembler lossage, recognize PIC+GOTOFF and turn it back
12357 into a direct symbol reference.
12359 On Darwin, this is necessary to avoid a crash, because Darwin
12360 has a different PIC label for each routine but the DWARF debugging
12361 information is not associated with any particular routine, so it's
12362 necessary to remove references to the PIC label from RTL stored by
12363 the DWARF output code. */
12366 ix86_delegitimize_address (rtx x)
12368 rtx orig_x = delegitimize_mem_from_attrs (x);
12369 /* addend is NULL or some rtx if x is something+GOTOFF where
12370 something doesn't include the PIC register. */
12371 rtx addend = NULL_RTX;
12372 /* reg_addend is NULL or a multiple of some register. */
12373 rtx reg_addend = NULL_RTX;
12374 /* const_addend is NULL or a const_int. */
12375 rtx const_addend = NULL_RTX;
12376 /* This is the result, or NULL. */
12377 rtx result = NULL_RTX;
12386 if (GET_CODE (x) != CONST
12387 || GET_CODE (XEXP (x, 0)) != UNSPEC
12388 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12389 || !MEM_P (orig_x))
12390 return ix86_delegitimize_tls_address (orig_x);
12391 x = XVECEXP (XEXP (x, 0), 0, 0);
12392 if (GET_MODE (orig_x) != Pmode)
12393 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12397 if (GET_CODE (x) != PLUS
12398 || GET_CODE (XEXP (x, 1)) != CONST)
12399 return ix86_delegitimize_tls_address (orig_x);
12401 if (ix86_pic_register_p (XEXP (x, 0)))
12402 /* %ebx + GOT/GOTOFF */
12404 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12406 /* %ebx + %reg * scale + GOT/GOTOFF */
12407 reg_addend = XEXP (x, 0);
12408 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12409 reg_addend = XEXP (reg_addend, 1);
12410 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12411 reg_addend = XEXP (reg_addend, 0);
12414 reg_addend = NULL_RTX;
12415 addend = XEXP (x, 0);
12419 addend = XEXP (x, 0);
12421 x = XEXP (XEXP (x, 1), 0);
12422 if (GET_CODE (x) == PLUS
12423 && CONST_INT_P (XEXP (x, 1)))
12425 const_addend = XEXP (x, 1);
12429 if (GET_CODE (x) == UNSPEC
12430 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12431 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12432 result = XVECEXP (x, 0, 0);
12434 if (TARGET_MACHO && darwin_local_data_pic (x)
12435 && !MEM_P (orig_x))
12436 result = XVECEXP (x, 0, 0);
12439 return ix86_delegitimize_tls_address (orig_x);
12442 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12444 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12447 /* If the rest of original X doesn't involve the PIC register, add
12448 addend and subtract pic_offset_table_rtx. This can happen e.g.
12450 leal (%ebx, %ecx, 4), %ecx
12452 movl foo@GOTOFF(%ecx), %edx
12453 in which case we return (%ecx - %ebx) + foo. */
12454 if (pic_offset_table_rtx)
12455 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12456 pic_offset_table_rtx),
12461 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12462 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12466 /* If X is a machine specific address (i.e. a symbol or label being
12467 referenced as a displacement from the GOT implemented using an
12468 UNSPEC), then return the base term. Otherwise return X. */
12471 ix86_find_base_term (rtx x)
12477 if (GET_CODE (x) != CONST)
12479 term = XEXP (x, 0);
12480 if (GET_CODE (term) == PLUS
12481 && (CONST_INT_P (XEXP (term, 1))
12482 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12483 term = XEXP (term, 0);
12484 if (GET_CODE (term) != UNSPEC
12485 || XINT (term, 1) != UNSPEC_GOTPCREL)
12488 return XVECEXP (term, 0, 0);
12491 return ix86_delegitimize_address (x);
12495 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12496 int fp, FILE *file)
12498 const char *suffix;
12500 if (mode == CCFPmode || mode == CCFPUmode)
12502 code = ix86_fp_compare_code_to_integer (code);
12506 code = reverse_condition (code);
12557 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12561 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12562 Those same assemblers have the same but opposite lossage on cmov. */
12563 if (mode == CCmode)
12564 suffix = fp ? "nbe" : "a";
12565 else if (mode == CCCmode)
12568 gcc_unreachable ();
12584 gcc_unreachable ();
12588 gcc_assert (mode == CCmode || mode == CCCmode);
12605 gcc_unreachable ();
12609 /* ??? As above. */
12610 gcc_assert (mode == CCmode || mode == CCCmode);
12611 suffix = fp ? "nb" : "ae";
12614 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12618 /* ??? As above. */
12619 if (mode == CCmode)
12621 else if (mode == CCCmode)
12622 suffix = fp ? "nb" : "ae";
12624 gcc_unreachable ();
12627 suffix = fp ? "u" : "p";
12630 suffix = fp ? "nu" : "np";
12633 gcc_unreachable ();
12635 fputs (suffix, file);
12638 /* Print the name of register X to FILE based on its machine mode and number.
12639 If CODE is 'w', pretend the mode is HImode.
12640 If CODE is 'b', pretend the mode is QImode.
12641 If CODE is 'k', pretend the mode is SImode.
12642 If CODE is 'q', pretend the mode is DImode.
12643 If CODE is 'x', pretend the mode is V4SFmode.
12644 If CODE is 't', pretend the mode is V8SFmode.
12645 If CODE is 'h', pretend the reg is the 'high' byte register.
12646 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12647 If CODE is 'd', duplicate the operand for AVX instruction.
12651 print_reg (rtx x, int code, FILE *file)
12654 bool duplicated = code == 'd' && TARGET_AVX;
12656 gcc_assert (x == pc_rtx
12657 || (REGNO (x) != ARG_POINTER_REGNUM
12658 && REGNO (x) != FRAME_POINTER_REGNUM
12659 && REGNO (x) != FLAGS_REG
12660 && REGNO (x) != FPSR_REG
12661 && REGNO (x) != FPCR_REG));
12663 if (ASSEMBLER_DIALECT == ASM_ATT)
12668 gcc_assert (TARGET_64BIT);
12669 fputs ("rip", file);
12673 if (code == 'w' || MMX_REG_P (x))
12675 else if (code == 'b')
12677 else if (code == 'k')
12679 else if (code == 'q')
12681 else if (code == 'y')
12683 else if (code == 'h')
12685 else if (code == 'x')
12687 else if (code == 't')
12690 code = GET_MODE_SIZE (GET_MODE (x));
12692 /* Irritatingly, AMD extended registers use different naming convention
12693 from the normal registers. */
12694 if (REX_INT_REG_P (x))
12696 gcc_assert (TARGET_64BIT);
12700 error ("extended registers have no high halves");
12703 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12706 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12709 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12712 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12715 error ("unsupported operand size for extended register");
12725 if (STACK_TOP_P (x))
12734 if (! ANY_FP_REG_P (x))
12735 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12740 reg = hi_reg_name[REGNO (x)];
12743 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12745 reg = qi_reg_name[REGNO (x)];
12748 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12750 reg = qi_high_reg_name[REGNO (x)];
12755 gcc_assert (!duplicated);
12757 fputs (hi_reg_name[REGNO (x)] + 1, file);
12762 gcc_unreachable ();
12768 if (ASSEMBLER_DIALECT == ASM_ATT)
12769 fprintf (file, ", %%%s", reg);
12771 fprintf (file, ", %s", reg);
12775 /* Locate some local-dynamic symbol still in use by this function
12776 so that we can print its name in some tls_local_dynamic_base
12780 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12784 if (GET_CODE (x) == SYMBOL_REF
12785 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12787 cfun->machine->some_ld_name = XSTR (x, 0);
12794 static const char *
12795 get_some_local_dynamic_name (void)
12799 if (cfun->machine->some_ld_name)
12800 return cfun->machine->some_ld_name;
12802 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12803 if (NONDEBUG_INSN_P (insn)
12804 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12805 return cfun->machine->some_ld_name;
12810 /* Meaning of CODE:
12811 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12812 C -- print opcode suffix for set/cmov insn.
12813 c -- like C, but print reversed condition
12814 F,f -- likewise, but for floating-point.
12815 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12817 R -- print the prefix for register names.
12818 z -- print the opcode suffix for the size of the current operand.
12819 Z -- likewise, with special suffixes for x87 instructions.
12820 * -- print a star (in certain assembler syntax)
12821 A -- print an absolute memory reference.
12822 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12823 s -- print a shift double count, followed by the assemblers argument
12825 b -- print the QImode name of the register for the indicated operand.
12826 %b0 would print %al if operands[0] is reg 0.
12827 w -- likewise, print the HImode name of the register.
12828 k -- likewise, print the SImode name of the register.
12829 q -- likewise, print the DImode name of the register.
12830 x -- likewise, print the V4SFmode name of the register.
12831 t -- likewise, print the V8SFmode name of the register.
12832 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12833 y -- print "st(0)" instead of "st" as a register.
12834 d -- print duplicated register operand for AVX instruction.
12835 D -- print condition for SSE cmp instruction.
12836 P -- if PIC, print an @PLT suffix.
12837 X -- don't print any sort of PIC '@' suffix for a symbol.
12838 & -- print some in-use local-dynamic symbol name.
12839 H -- print a memory address offset by 8; used for sse high-parts
12840 Y -- print condition for XOP pcom* instruction.
12841 + -- print a branch hint as 'cs' or 'ds' prefix
12842 ; -- print a semicolon (after prefixes due to bug in older gas).
12843 @ -- print a segment register of thread base pointer load
12847 ix86_print_operand (FILE *file, rtx x, int code)
12854 if (ASSEMBLER_DIALECT == ASM_ATT)
12860 const char *name = get_some_local_dynamic_name ();
12862 output_operand_lossage ("'%%&' used without any "
12863 "local dynamic TLS references");
12865 assemble_name (file, name);
12870 switch (ASSEMBLER_DIALECT)
12877 /* Intel syntax. For absolute addresses, registers should not
12878 be surrounded by braces. */
12882 ix86_print_operand (file, x, 0);
12889 gcc_unreachable ();
12892 ix86_print_operand (file, x, 0);
12897 if (ASSEMBLER_DIALECT == ASM_ATT)
12902 if (ASSEMBLER_DIALECT == ASM_ATT)
12907 if (ASSEMBLER_DIALECT == ASM_ATT)
12912 if (ASSEMBLER_DIALECT == ASM_ATT)
12917 if (ASSEMBLER_DIALECT == ASM_ATT)
12922 if (ASSEMBLER_DIALECT == ASM_ATT)
12927 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12929 /* Opcodes don't get size suffixes if using Intel opcodes. */
12930 if (ASSEMBLER_DIALECT == ASM_INTEL)
12933 switch (GET_MODE_SIZE (GET_MODE (x)))
12952 output_operand_lossage
12953 ("invalid operand size for operand code '%c'", code);
12958 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12960 (0, "non-integer operand used with operand code '%c'", code);
12964 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12965 if (ASSEMBLER_DIALECT == ASM_INTEL)
12968 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12970 switch (GET_MODE_SIZE (GET_MODE (x)))
12973 #ifdef HAVE_AS_IX86_FILDS
12983 #ifdef HAVE_AS_IX86_FILDQ
12986 fputs ("ll", file);
12994 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12996 /* 387 opcodes don't get size suffixes
12997 if the operands are registers. */
12998 if (STACK_REG_P (x))
13001 switch (GET_MODE_SIZE (GET_MODE (x)))
13022 output_operand_lossage
13023 ("invalid operand type used with operand code '%c'", code);
13027 output_operand_lossage
13028 ("invalid operand size for operand code '%c'", code);
13045 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13047 ix86_print_operand (file, x, 0);
13048 fputs (", ", file);
13053 /* Little bit of braindamage here. The SSE compare instructions
13054 does use completely different names for the comparisons that the
13055 fp conditional moves. */
13058 switch (GET_CODE (x))
13061 fputs ("eq", file);
13064 fputs ("eq_us", file);
13067 fputs ("lt", file);
13070 fputs ("nge", file);
13073 fputs ("le", file);
13076 fputs ("ngt", file);
13079 fputs ("unord", file);
13082 fputs ("neq", file);
13085 fputs ("neq_oq", file);
13088 fputs ("ge", file);
13091 fputs ("nlt", file);
13094 fputs ("gt", file);
13097 fputs ("nle", file);
13100 fputs ("ord", file);
13103 output_operand_lossage ("operand is not a condition code, "
13104 "invalid operand code 'D'");
13110 switch (GET_CODE (x))
13114 fputs ("eq", file);
13118 fputs ("lt", file);
13122 fputs ("le", file);
13125 fputs ("unord", file);
13129 fputs ("neq", file);
13133 fputs ("nlt", file);
13137 fputs ("nle", file);
13140 fputs ("ord", file);
13143 output_operand_lossage ("operand is not a condition code, "
13144 "invalid operand code 'D'");
13150 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13151 if (ASSEMBLER_DIALECT == ASM_ATT)
13153 switch (GET_MODE (x))
13155 case HImode: putc ('w', file); break;
13157 case SFmode: putc ('l', file); break;
13159 case DFmode: putc ('q', file); break;
13160 default: gcc_unreachable ();
13167 if (!COMPARISON_P (x))
13169 output_operand_lossage ("operand is neither a constant nor a "
13170 "condition code, invalid operand code "
13174 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13177 if (!COMPARISON_P (x))
13179 output_operand_lossage ("operand is neither a constant nor a "
13180 "condition code, invalid operand code "
13184 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13185 if (ASSEMBLER_DIALECT == ASM_ATT)
13188 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13191 /* Like above, but reverse condition */
13193 /* Check to see if argument to %c is really a constant
13194 and not a condition code which needs to be reversed. */
13195 if (!COMPARISON_P (x))
13197 output_operand_lossage ("operand is neither a constant nor a "
13198 "condition code, invalid operand "
13202 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13205 if (!COMPARISON_P (x))
13207 output_operand_lossage ("operand is neither a constant nor a "
13208 "condition code, invalid operand "
13212 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13213 if (ASSEMBLER_DIALECT == ASM_ATT)
13216 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13220 /* It doesn't actually matter what mode we use here, as we're
13221 only going to use this for printing. */
13222 x = adjust_address_nv (x, DImode, 8);
13230 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13233 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13236 int pred_val = INTVAL (XEXP (x, 0));
13238 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13239 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13241 int taken = pred_val > REG_BR_PROB_BASE / 2;
13242 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13244 /* Emit hints only in the case default branch prediction
13245 heuristics would fail. */
13246 if (taken != cputaken)
13248 /* We use 3e (DS) prefix for taken branches and
13249 2e (CS) prefix for not taken branches. */
13251 fputs ("ds ; ", file);
13253 fputs ("cs ; ", file);
13261 switch (GET_CODE (x))
13264 fputs ("neq", file);
13267 fputs ("eq", file);
13271 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13275 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13279 fputs ("le", file);
13283 fputs ("lt", file);
13286 fputs ("unord", file);
13289 fputs ("ord", file);
13292 fputs ("ueq", file);
13295 fputs ("nlt", file);
13298 fputs ("nle", file);
13301 fputs ("ule", file);
13304 fputs ("ult", file);
13307 fputs ("une", file);
13310 output_operand_lossage ("operand is not a condition code, "
13311 "invalid operand code 'Y'");
13317 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13323 if (ASSEMBLER_DIALECT == ASM_ATT)
13326 /* The kernel uses a different segment register for performance
13327 reasons; a system call would not have to trash the userspace
13328 segment register, which would be expensive. */
13329 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13330 fputs ("fs", file);
13332 fputs ("gs", file);
13336 output_operand_lossage ("invalid operand code '%c'", code);
13341 print_reg (x, code, file);
13343 else if (MEM_P (x))
13345 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13346 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13347 && GET_MODE (x) != BLKmode)
13350 switch (GET_MODE_SIZE (GET_MODE (x)))
13352 case 1: size = "BYTE"; break;
13353 case 2: size = "WORD"; break;
13354 case 4: size = "DWORD"; break;
13355 case 8: size = "QWORD"; break;
13356 case 12: size = "TBYTE"; break;
13358 if (GET_MODE (x) == XFmode)
13363 case 32: size = "YMMWORD"; break;
13365 gcc_unreachable ();
13368 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13371 else if (code == 'w')
13373 else if (code == 'k')
13376 fputs (size, file);
13377 fputs (" PTR ", file);
13381 /* Avoid (%rip) for call operands. */
13382 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13383 && !CONST_INT_P (x))
13384 output_addr_const (file, x);
13385 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13386 output_operand_lossage ("invalid constraints for operand");
13388 output_address (x);
13391 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13396 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13397 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13399 if (ASSEMBLER_DIALECT == ASM_ATT)
13401 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13403 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13405 fprintf (file, "0x%08x", (unsigned int) l);
13408 /* These float cases don't actually occur as immediate operands. */
13409 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13413 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13414 fputs (dstr, file);
13417 else if (GET_CODE (x) == CONST_DOUBLE
13418 && GET_MODE (x) == XFmode)
13422 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13423 fputs (dstr, file);
13428 /* We have patterns that allow zero sets of memory, for instance.
13429 In 64-bit mode, we should probably support all 8-byte vectors,
13430 since we can in fact encode that into an immediate. */
13431 if (GET_CODE (x) == CONST_VECTOR)
13433 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13439 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13441 if (ASSEMBLER_DIALECT == ASM_ATT)
13444 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13445 || GET_CODE (x) == LABEL_REF)
13447 if (ASSEMBLER_DIALECT == ASM_ATT)
13450 fputs ("OFFSET FLAT:", file);
13453 if (CONST_INT_P (x))
13454 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13456 output_pic_addr_const (file, x, code);
13458 output_addr_const (file, x);
13463 ix86_print_operand_punct_valid_p (unsigned char code)
13465 return (code == '@' || code == '*' || code == '+'
13466 || code == '&' || code == ';');
13469 /* Print a memory operand whose address is ADDR. */
13472 ix86_print_operand_address (FILE *file, rtx addr)
13474 struct ix86_address parts;
13475 rtx base, index, disp;
13477 int ok = ix86_decompose_address (addr, &parts);
13482 index = parts.index;
13484 scale = parts.scale;
13492 if (ASSEMBLER_DIALECT == ASM_ATT)
13494 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13497 gcc_unreachable ();
13500 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13501 if (TARGET_64BIT && !base && !index)
13505 if (GET_CODE (disp) == CONST
13506 && GET_CODE (XEXP (disp, 0)) == PLUS
13507 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13508 symbol = XEXP (XEXP (disp, 0), 0);
13510 if (GET_CODE (symbol) == LABEL_REF
13511 || (GET_CODE (symbol) == SYMBOL_REF
13512 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13515 if (!base && !index)
13517 /* Displacement only requires special attention. */
13519 if (CONST_INT_P (disp))
13521 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13522 fputs ("ds:", file);
13523 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13526 output_pic_addr_const (file, disp, 0);
13528 output_addr_const (file, disp);
13532 if (ASSEMBLER_DIALECT == ASM_ATT)
13537 output_pic_addr_const (file, disp, 0);
13538 else if (GET_CODE (disp) == LABEL_REF)
13539 output_asm_label (disp);
13541 output_addr_const (file, disp);
13546 print_reg (base, 0, file);
13550 print_reg (index, 0, file);
13552 fprintf (file, ",%d", scale);
13558 rtx offset = NULL_RTX;
13562 /* Pull out the offset of a symbol; print any symbol itself. */
13563 if (GET_CODE (disp) == CONST
13564 && GET_CODE (XEXP (disp, 0)) == PLUS
13565 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13567 offset = XEXP (XEXP (disp, 0), 1);
13568 disp = gen_rtx_CONST (VOIDmode,
13569 XEXP (XEXP (disp, 0), 0));
13573 output_pic_addr_const (file, disp, 0);
13574 else if (GET_CODE (disp) == LABEL_REF)
13575 output_asm_label (disp);
13576 else if (CONST_INT_P (disp))
13579 output_addr_const (file, disp);
13585 print_reg (base, 0, file);
13588 if (INTVAL (offset) >= 0)
13590 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13594 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13601 print_reg (index, 0, file);
13603 fprintf (file, "*%d", scale);
13610 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13613 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13617 if (GET_CODE (x) != UNSPEC)
13620 op = XVECEXP (x, 0, 0);
13621 switch (XINT (x, 1))
13623 case UNSPEC_GOTTPOFF:
13624 output_addr_const (file, op);
13625 /* FIXME: This might be @TPOFF in Sun ld. */
13626 fputs ("@gottpoff", file);
13629 output_addr_const (file, op);
13630 fputs ("@tpoff", file);
13632 case UNSPEC_NTPOFF:
13633 output_addr_const (file, op);
13635 fputs ("@tpoff", file);
13637 fputs ("@ntpoff", file);
13639 case UNSPEC_DTPOFF:
13640 output_addr_const (file, op);
13641 fputs ("@dtpoff", file);
13643 case UNSPEC_GOTNTPOFF:
13644 output_addr_const (file, op);
13646 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13647 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13649 fputs ("@gotntpoff", file);
13651 case UNSPEC_INDNTPOFF:
13652 output_addr_const (file, op);
13653 fputs ("@indntpoff", file);
13656 case UNSPEC_MACHOPIC_OFFSET:
13657 output_addr_const (file, op);
13659 machopic_output_function_base_name (file);
13663 case UNSPEC_STACK_CHECK:
13667 gcc_assert (flag_split_stack);
13669 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
13670 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
13672 gcc_unreachable ();
13675 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
13686 /* Split one or more double-mode RTL references into pairs of half-mode
13687 references. The RTL can be REG, offsettable MEM, integer constant, or
13688 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
13689 split and "num" is its length. lo_half and hi_half are output arrays
13690 that parallel "operands". */
13693 split_double_mode (enum machine_mode mode, rtx operands[],
13694 int num, rtx lo_half[], rtx hi_half[])
13696 enum machine_mode half_mode;
13702 half_mode = DImode;
13705 half_mode = SImode;
13708 gcc_unreachable ();
13711 byte = GET_MODE_SIZE (half_mode);
13715 rtx op = operands[num];
13717 /* simplify_subreg refuse to split volatile memory addresses,
13718 but we still have to handle it. */
13721 lo_half[num] = adjust_address (op, half_mode, 0);
13722 hi_half[num] = adjust_address (op, half_mode, byte);
13726 lo_half[num] = simplify_gen_subreg (half_mode, op,
13727 GET_MODE (op) == VOIDmode
13728 ? mode : GET_MODE (op), 0);
13729 hi_half[num] = simplify_gen_subreg (half_mode, op,
13730 GET_MODE (op) == VOIDmode
13731 ? mode : GET_MODE (op), byte);
13736 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13737 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13738 is the expression of the binary operation. The output may either be
13739 emitted here, or returned to the caller, like all output_* functions.
13741 There is no guarantee that the operands are the same mode, as they
13742 might be within FLOAT or FLOAT_EXTEND expressions. */
13744 #ifndef SYSV386_COMPAT
13745 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13746 wants to fix the assemblers because that causes incompatibility
13747 with gcc. No-one wants to fix gcc because that causes
13748 incompatibility with assemblers... You can use the option of
13749 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13750 #define SYSV386_COMPAT 1
13754 output_387_binary_op (rtx insn, rtx *operands)
13756 static char buf[40];
13759 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13761 #ifdef ENABLE_CHECKING
13762 /* Even if we do not want to check the inputs, this documents input
13763 constraints. Which helps in understanding the following code. */
13764 if (STACK_REG_P (operands[0])
13765 && ((REG_P (operands[1])
13766 && REGNO (operands[0]) == REGNO (operands[1])
13767 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13768 || (REG_P (operands[2])
13769 && REGNO (operands[0]) == REGNO (operands[2])
13770 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13771 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13774 gcc_assert (is_sse);
13777 switch (GET_CODE (operands[3]))
13780 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13781 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13789 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13790 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13798 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13799 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13807 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13808 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13816 gcc_unreachable ();
13823 strcpy (buf, ssep);
13824 if (GET_MODE (operands[0]) == SFmode)
13825 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13827 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13831 strcpy (buf, ssep + 1);
13832 if (GET_MODE (operands[0]) == SFmode)
13833 strcat (buf, "ss\t{%2, %0|%0, %2}");
13835 strcat (buf, "sd\t{%2, %0|%0, %2}");
13841 switch (GET_CODE (operands[3]))
13845 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13847 rtx temp = operands[2];
13848 operands[2] = operands[1];
13849 operands[1] = temp;
13852 /* know operands[0] == operands[1]. */
13854 if (MEM_P (operands[2]))
13860 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13862 if (STACK_TOP_P (operands[0]))
13863 /* How is it that we are storing to a dead operand[2]?
13864 Well, presumably operands[1] is dead too. We can't
13865 store the result to st(0) as st(0) gets popped on this
13866 instruction. Instead store to operands[2] (which I
13867 think has to be st(1)). st(1) will be popped later.
13868 gcc <= 2.8.1 didn't have this check and generated
13869 assembly code that the Unixware assembler rejected. */
13870 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13872 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13876 if (STACK_TOP_P (operands[0]))
13877 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13879 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13884 if (MEM_P (operands[1]))
13890 if (MEM_P (operands[2]))
13896 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13899 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13900 derived assemblers, confusingly reverse the direction of
13901 the operation for fsub{r} and fdiv{r} when the
13902 destination register is not st(0). The Intel assembler
13903 doesn't have this brain damage. Read !SYSV386_COMPAT to
13904 figure out what the hardware really does. */
13905 if (STACK_TOP_P (operands[0]))
13906 p = "{p\t%0, %2|rp\t%2, %0}";
13908 p = "{rp\t%2, %0|p\t%0, %2}";
13910 if (STACK_TOP_P (operands[0]))
13911 /* As above for fmul/fadd, we can't store to st(0). */
13912 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13914 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13919 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13922 if (STACK_TOP_P (operands[0]))
13923 p = "{rp\t%0, %1|p\t%1, %0}";
13925 p = "{p\t%1, %0|rp\t%0, %1}";
13927 if (STACK_TOP_P (operands[0]))
13928 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13930 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13935 if (STACK_TOP_P (operands[0]))
13937 if (STACK_TOP_P (operands[1]))
13938 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13940 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13943 else if (STACK_TOP_P (operands[1]))
13946 p = "{\t%1, %0|r\t%0, %1}";
13948 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13954 p = "{r\t%2, %0|\t%0, %2}";
13956 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13962 gcc_unreachable ();
13969 /* Return needed mode for entity in optimize_mode_switching pass. */
13972 ix86_mode_needed (int entity, rtx insn)
13974 enum attr_i387_cw mode;
13976 /* The mode UNINITIALIZED is used to store control word after a
13977 function call or ASM pattern. The mode ANY specify that function
13978 has no requirements on the control word and make no changes in the
13979 bits we are interested in. */
13982 || (NONJUMP_INSN_P (insn)
13983 && (asm_noperands (PATTERN (insn)) >= 0
13984 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13985 return I387_CW_UNINITIALIZED;
13987 if (recog_memoized (insn) < 0)
13988 return I387_CW_ANY;
13990 mode = get_attr_i387_cw (insn);
13995 if (mode == I387_CW_TRUNC)
14000 if (mode == I387_CW_FLOOR)
14005 if (mode == I387_CW_CEIL)
14010 if (mode == I387_CW_MASK_PM)
14015 gcc_unreachable ();
14018 return I387_CW_ANY;
14021 /* Output code to initialize control word copies used by trunc?f?i and
14022 rounding patterns. CURRENT_MODE is set to current control word,
14023 while NEW_MODE is set to new control word. */
14026 emit_i387_cw_initialization (int mode)
14028 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14031 enum ix86_stack_slot slot;
14033 rtx reg = gen_reg_rtx (HImode);
14035 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14036 emit_move_insn (reg, copy_rtx (stored_mode));
14038 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14039 || optimize_function_for_size_p (cfun))
14043 case I387_CW_TRUNC:
14044 /* round toward zero (truncate) */
14045 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14046 slot = SLOT_CW_TRUNC;
14049 case I387_CW_FLOOR:
14050 /* round down toward -oo */
14051 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14052 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14053 slot = SLOT_CW_FLOOR;
14057 /* round up toward +oo */
14058 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14059 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14060 slot = SLOT_CW_CEIL;
14063 case I387_CW_MASK_PM:
14064 /* mask precision exception for nearbyint() */
14065 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14066 slot = SLOT_CW_MASK_PM;
14070 gcc_unreachable ();
14077 case I387_CW_TRUNC:
14078 /* round toward zero (truncate) */
14079 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14080 slot = SLOT_CW_TRUNC;
14083 case I387_CW_FLOOR:
14084 /* round down toward -oo */
14085 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14086 slot = SLOT_CW_FLOOR;
14090 /* round up toward +oo */
14091 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14092 slot = SLOT_CW_CEIL;
14095 case I387_CW_MASK_PM:
14096 /* mask precision exception for nearbyint() */
14097 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14098 slot = SLOT_CW_MASK_PM;
14102 gcc_unreachable ();
14106 gcc_assert (slot < MAX_386_STACK_LOCALS);
14108 new_mode = assign_386_stack_local (HImode, slot);
14109 emit_move_insn (new_mode, reg);
14112 /* Output code for INSN to convert a float to a signed int. OPERANDS
14113 are the insn operands. The output may be [HSD]Imode and the input
14114 operand may be [SDX]Fmode. */
14117 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14119 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14120 int dimode_p = GET_MODE (operands[0]) == DImode;
14121 int round_mode = get_attr_i387_cw (insn);
14123 /* Jump through a hoop or two for DImode, since the hardware has no
14124 non-popping instruction. We used to do this a different way, but
14125 that was somewhat fragile and broke with post-reload splitters. */
14126 if ((dimode_p || fisttp) && !stack_top_dies)
14127 output_asm_insn ("fld\t%y1", operands);
14129 gcc_assert (STACK_TOP_P (operands[1]));
14130 gcc_assert (MEM_P (operands[0]));
14131 gcc_assert (GET_MODE (operands[1]) != TFmode);
14134 output_asm_insn ("fisttp%Z0\t%0", operands);
14137 if (round_mode != I387_CW_ANY)
14138 output_asm_insn ("fldcw\t%3", operands);
14139 if (stack_top_dies || dimode_p)
14140 output_asm_insn ("fistp%Z0\t%0", operands);
14142 output_asm_insn ("fist%Z0\t%0", operands);
14143 if (round_mode != I387_CW_ANY)
14144 output_asm_insn ("fldcw\t%2", operands);
14150 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14151 have the values zero or one, indicates the ffreep insn's operand
14152 from the OPERANDS array. */
14154 static const char *
14155 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14157 if (TARGET_USE_FFREEP)
14158 #ifdef HAVE_AS_IX86_FFREEP
14159 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14162 static char retval[32];
14163 int regno = REGNO (operands[opno]);
14165 gcc_assert (FP_REGNO_P (regno));
14167 regno -= FIRST_STACK_REG;
14169 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14174 return opno ? "fstp\t%y1" : "fstp\t%y0";
14178 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14179 should be used. UNORDERED_P is true when fucom should be used. */
14182 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14184 int stack_top_dies;
14185 rtx cmp_op0, cmp_op1;
14186 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14190 cmp_op0 = operands[0];
14191 cmp_op1 = operands[1];
14195 cmp_op0 = operands[1];
14196 cmp_op1 = operands[2];
14201 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14202 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14203 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14204 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14206 if (GET_MODE (operands[0]) == SFmode)
14208 return &ucomiss[TARGET_AVX ? 0 : 1];
14210 return &comiss[TARGET_AVX ? 0 : 1];
14213 return &ucomisd[TARGET_AVX ? 0 : 1];
14215 return &comisd[TARGET_AVX ? 0 : 1];
14218 gcc_assert (STACK_TOP_P (cmp_op0));
14220 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14222 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14224 if (stack_top_dies)
14226 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14227 return output_387_ffreep (operands, 1);
14230 return "ftst\n\tfnstsw\t%0";
14233 if (STACK_REG_P (cmp_op1)
14235 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14236 && REGNO (cmp_op1) != FIRST_STACK_REG)
14238 /* If both the top of the 387 stack dies, and the other operand
14239 is also a stack register that dies, then this must be a
14240 `fcompp' float compare */
14244 /* There is no double popping fcomi variant. Fortunately,
14245 eflags is immune from the fstp's cc clobbering. */
14247 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14249 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14250 return output_387_ffreep (operands, 0);
14255 return "fucompp\n\tfnstsw\t%0";
14257 return "fcompp\n\tfnstsw\t%0";
14262 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14264 static const char * const alt[16] =
14266 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14267 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14268 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14269 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14271 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14272 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14276 "fcomi\t{%y1, %0|%0, %y1}",
14277 "fcomip\t{%y1, %0|%0, %y1}",
14278 "fucomi\t{%y1, %0|%0, %y1}",
14279 "fucomip\t{%y1, %0|%0, %y1}",
14290 mask = eflags_p << 3;
14291 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14292 mask |= unordered_p << 1;
14293 mask |= stack_top_dies;
14295 gcc_assert (mask < 16);
14304 ix86_output_addr_vec_elt (FILE *file, int value)
14306 const char *directive = ASM_LONG;
14310 directive = ASM_QUAD;
14312 gcc_assert (!TARGET_64BIT);
14315 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14319 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14321 const char *directive = ASM_LONG;
14324 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14325 directive = ASM_QUAD;
14327 gcc_assert (!TARGET_64BIT);
14329 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14330 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14331 fprintf (file, "%s%s%d-%s%d\n",
14332 directive, LPREFIX, value, LPREFIX, rel);
14333 else if (HAVE_AS_GOTOFF_IN_DATA)
14334 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14336 else if (TARGET_MACHO)
14338 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14339 machopic_output_function_base_name (file);
14344 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14345 GOT_SYMBOL_NAME, LPREFIX, value);
14348 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14352 ix86_expand_clear (rtx dest)
14356 /* We play register width games, which are only valid after reload. */
14357 gcc_assert (reload_completed);
14359 /* Avoid HImode and its attendant prefix byte. */
14360 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14361 dest = gen_rtx_REG (SImode, REGNO (dest));
14362 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14364 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14365 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14367 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14368 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14374 /* X is an unchanging MEM. If it is a constant pool reference, return
14375 the constant pool rtx, else NULL. */
14378 maybe_get_pool_constant (rtx x)
14380 x = ix86_delegitimize_address (XEXP (x, 0));
14382 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14383 return get_pool_constant (x);
14389 ix86_expand_move (enum machine_mode mode, rtx operands[])
14392 enum tls_model model;
14397 if (GET_CODE (op1) == SYMBOL_REF)
14399 model = SYMBOL_REF_TLS_MODEL (op1);
14402 op1 = legitimize_tls_address (op1, model, true);
14403 op1 = force_operand (op1, op0);
14407 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14408 && SYMBOL_REF_DLLIMPORT_P (op1))
14409 op1 = legitimize_dllimport_symbol (op1, false);
14411 else if (GET_CODE (op1) == CONST
14412 && GET_CODE (XEXP (op1, 0)) == PLUS
14413 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14415 rtx addend = XEXP (XEXP (op1, 0), 1);
14416 rtx symbol = XEXP (XEXP (op1, 0), 0);
14419 model = SYMBOL_REF_TLS_MODEL (symbol);
14421 tmp = legitimize_tls_address (symbol, model, true);
14422 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14423 && SYMBOL_REF_DLLIMPORT_P (symbol))
14424 tmp = legitimize_dllimport_symbol (symbol, true);
14428 tmp = force_operand (tmp, NULL);
14429 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14430 op0, 1, OPTAB_DIRECT);
14436 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14438 if (TARGET_MACHO && !TARGET_64BIT)
14443 rtx temp = ((reload_in_progress
14444 || ((op0 && REG_P (op0))
14446 ? op0 : gen_reg_rtx (Pmode));
14447 op1 = machopic_indirect_data_reference (op1, temp);
14448 op1 = machopic_legitimize_pic_address (op1, mode,
14449 temp == op1 ? 0 : temp);
14451 else if (MACHOPIC_INDIRECT)
14452 op1 = machopic_indirect_data_reference (op1, 0);
14460 op1 = force_reg (Pmode, op1);
14461 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14463 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14464 op1 = legitimize_pic_address (op1, reg);
14473 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14474 || !push_operand (op0, mode))
14476 op1 = force_reg (mode, op1);
14478 if (push_operand (op0, mode)
14479 && ! general_no_elim_operand (op1, mode))
14480 op1 = copy_to_mode_reg (mode, op1);
14482 /* Force large constants in 64bit compilation into register
14483 to get them CSEed. */
14484 if (can_create_pseudo_p ()
14485 && (mode == DImode) && TARGET_64BIT
14486 && immediate_operand (op1, mode)
14487 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14488 && !register_operand (op0, mode)
14490 op1 = copy_to_mode_reg (mode, op1);
14492 if (can_create_pseudo_p ()
14493 && FLOAT_MODE_P (mode)
14494 && GET_CODE (op1) == CONST_DOUBLE)
14496 /* If we are loading a floating point constant to a register,
14497 force the value to memory now, since we'll get better code
14498 out the back end. */
14500 op1 = validize_mem (force_const_mem (mode, op1));
14501 if (!register_operand (op0, mode))
14503 rtx temp = gen_reg_rtx (mode);
14504 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14505 emit_move_insn (op0, temp);
14511 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14515 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14517 rtx op0 = operands[0], op1 = operands[1];
14518 unsigned int align = GET_MODE_ALIGNMENT (mode);
14520 /* Force constants other than zero into memory. We do not know how
14521 the instructions used to build constants modify the upper 64 bits
14522 of the register, once we have that information we may be able
14523 to handle some of them more efficiently. */
14524 if (can_create_pseudo_p ()
14525 && register_operand (op0, mode)
14526 && (CONSTANT_P (op1)
14527 || (GET_CODE (op1) == SUBREG
14528 && CONSTANT_P (SUBREG_REG (op1))))
14529 && !standard_sse_constant_p (op1))
14530 op1 = validize_mem (force_const_mem (mode, op1));
14532 /* We need to check memory alignment for SSE mode since attribute
14533 can make operands unaligned. */
14534 if (can_create_pseudo_p ()
14535 && SSE_REG_MODE_P (mode)
14536 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14537 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14541 /* ix86_expand_vector_move_misalign() does not like constants ... */
14542 if (CONSTANT_P (op1)
14543 || (GET_CODE (op1) == SUBREG
14544 && CONSTANT_P (SUBREG_REG (op1))))
14545 op1 = validize_mem (force_const_mem (mode, op1));
14547 /* ... nor both arguments in memory. */
14548 if (!register_operand (op0, mode)
14549 && !register_operand (op1, mode))
14550 op1 = force_reg (mode, op1);
14552 tmp[0] = op0; tmp[1] = op1;
14553 ix86_expand_vector_move_misalign (mode, tmp);
14557 /* Make operand1 a register if it isn't already. */
14558 if (can_create_pseudo_p ()
14559 && !register_operand (op0, mode)
14560 && !register_operand (op1, mode))
14562 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14566 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14569 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14570 straight to ix86_expand_vector_move. */
14571 /* Code generation for scalar reg-reg moves of single and double precision data:
14572 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14576 if (x86_sse_partial_reg_dependency == true)
14581 Code generation for scalar loads of double precision data:
14582 if (x86_sse_split_regs == true)
14583 movlpd mem, reg (gas syntax)
14587 Code generation for unaligned packed loads of single precision data
14588 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14589 if (x86_sse_unaligned_move_optimal)
14592 if (x86_sse_partial_reg_dependency == true)
14604 Code generation for unaligned packed loads of double precision data
14605 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14606 if (x86_sse_unaligned_move_optimal)
14609 if (x86_sse_split_regs == true)
14622 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14631 switch (GET_MODE_CLASS (mode))
14633 case MODE_VECTOR_INT:
14635 switch (GET_MODE_SIZE (mode))
14638 /* If we're optimizing for size, movups is the smallest. */
14639 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14641 op0 = gen_lowpart (V4SFmode, op0);
14642 op1 = gen_lowpart (V4SFmode, op1);
14643 emit_insn (gen_avx_movups (op0, op1));
14646 op0 = gen_lowpart (V16QImode, op0);
14647 op1 = gen_lowpart (V16QImode, op1);
14648 emit_insn (gen_avx_movdqu (op0, op1));
14651 op0 = gen_lowpart (V32QImode, op0);
14652 op1 = gen_lowpart (V32QImode, op1);
14653 emit_insn (gen_avx_movdqu256 (op0, op1));
14656 gcc_unreachable ();
14659 case MODE_VECTOR_FLOAT:
14660 op0 = gen_lowpart (mode, op0);
14661 op1 = gen_lowpart (mode, op1);
14666 emit_insn (gen_avx_movups (op0, op1));
14669 emit_insn (gen_avx_movups256 (op0, op1));
14672 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14674 op0 = gen_lowpart (V4SFmode, op0);
14675 op1 = gen_lowpart (V4SFmode, op1);
14676 emit_insn (gen_avx_movups (op0, op1));
14679 emit_insn (gen_avx_movupd (op0, op1));
14682 emit_insn (gen_avx_movupd256 (op0, op1));
14685 gcc_unreachable ();
14690 gcc_unreachable ();
14698 /* If we're optimizing for size, movups is the smallest. */
14699 if (optimize_insn_for_size_p ()
14700 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14702 op0 = gen_lowpart (V4SFmode, op0);
14703 op1 = gen_lowpart (V4SFmode, op1);
14704 emit_insn (gen_sse_movups (op0, op1));
14708 /* ??? If we have typed data, then it would appear that using
14709 movdqu is the only way to get unaligned data loaded with
14711 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14713 op0 = gen_lowpart (V16QImode, op0);
14714 op1 = gen_lowpart (V16QImode, op1);
14715 emit_insn (gen_sse2_movdqu (op0, op1));
14719 if (TARGET_SSE2 && mode == V2DFmode)
14723 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14725 op0 = gen_lowpart (V2DFmode, op0);
14726 op1 = gen_lowpart (V2DFmode, op1);
14727 emit_insn (gen_sse2_movupd (op0, op1));
14731 /* When SSE registers are split into halves, we can avoid
14732 writing to the top half twice. */
14733 if (TARGET_SSE_SPLIT_REGS)
14735 emit_clobber (op0);
14740 /* ??? Not sure about the best option for the Intel chips.
14741 The following would seem to satisfy; the register is
14742 entirely cleared, breaking the dependency chain. We
14743 then store to the upper half, with a dependency depth
14744 of one. A rumor has it that Intel recommends two movsd
14745 followed by an unpacklpd, but this is unconfirmed. And
14746 given that the dependency depth of the unpacklpd would
14747 still be one, I'm not sure why this would be better. */
14748 zero = CONST0_RTX (V2DFmode);
14751 m = adjust_address (op1, DFmode, 0);
14752 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14753 m = adjust_address (op1, DFmode, 8);
14754 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14758 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14760 op0 = gen_lowpart (V4SFmode, op0);
14761 op1 = gen_lowpart (V4SFmode, op1);
14762 emit_insn (gen_sse_movups (op0, op1));
14766 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14767 emit_move_insn (op0, CONST0_RTX (mode));
14769 emit_clobber (op0);
14771 if (mode != V4SFmode)
14772 op0 = gen_lowpart (V4SFmode, op0);
14773 m = adjust_address (op1, V2SFmode, 0);
14774 emit_insn (gen_sse_loadlps (op0, op0, m));
14775 m = adjust_address (op1, V2SFmode, 8);
14776 emit_insn (gen_sse_loadhps (op0, op0, m));
14779 else if (MEM_P (op0))
14781 /* If we're optimizing for size, movups is the smallest. */
14782 if (optimize_insn_for_size_p ()
14783 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14785 op0 = gen_lowpart (V4SFmode, op0);
14786 op1 = gen_lowpart (V4SFmode, op1);
14787 emit_insn (gen_sse_movups (op0, op1));
14791 /* ??? Similar to above, only less clear because of quote
14792 typeless stores unquote. */
14793 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14794 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14796 op0 = gen_lowpart (V16QImode, op0);
14797 op1 = gen_lowpart (V16QImode, op1);
14798 emit_insn (gen_sse2_movdqu (op0, op1));
14802 if (TARGET_SSE2 && mode == V2DFmode)
14804 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14806 op0 = gen_lowpart (V2DFmode, op0);
14807 op1 = gen_lowpart (V2DFmode, op1);
14808 emit_insn (gen_sse2_movupd (op0, op1));
14812 m = adjust_address (op0, DFmode, 0);
14813 emit_insn (gen_sse2_storelpd (m, op1));
14814 m = adjust_address (op0, DFmode, 8);
14815 emit_insn (gen_sse2_storehpd (m, op1));
14820 if (mode != V4SFmode)
14821 op1 = gen_lowpart (V4SFmode, op1);
14823 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14825 op0 = gen_lowpart (V4SFmode, op0);
14826 emit_insn (gen_sse_movups (op0, op1));
14830 m = adjust_address (op0, V2SFmode, 0);
14831 emit_insn (gen_sse_storelps (m, op1));
14832 m = adjust_address (op0, V2SFmode, 8);
14833 emit_insn (gen_sse_storehps (m, op1));
14838 gcc_unreachable ();
14841 /* Expand a push in MODE. This is some mode for which we do not support
14842 proper push instructions, at least from the registers that we expect
14843 the value to live in. */
14846 ix86_expand_push (enum machine_mode mode, rtx x)
14850 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14851 GEN_INT (-GET_MODE_SIZE (mode)),
14852 stack_pointer_rtx, 1, OPTAB_DIRECT);
14853 if (tmp != stack_pointer_rtx)
14854 emit_move_insn (stack_pointer_rtx, tmp);
14856 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14858 /* When we push an operand onto stack, it has to be aligned at least
14859 at the function argument boundary. However since we don't have
14860 the argument type, we can't determine the actual argument
14862 emit_move_insn (tmp, x);
14865 /* Helper function of ix86_fixup_binary_operands to canonicalize
14866 operand order. Returns true if the operands should be swapped. */
14869 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14872 rtx dst = operands[0];
14873 rtx src1 = operands[1];
14874 rtx src2 = operands[2];
14876 /* If the operation is not commutative, we can't do anything. */
14877 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14880 /* Highest priority is that src1 should match dst. */
14881 if (rtx_equal_p (dst, src1))
14883 if (rtx_equal_p (dst, src2))
14886 /* Next highest priority is that immediate constants come second. */
14887 if (immediate_operand (src2, mode))
14889 if (immediate_operand (src1, mode))
14892 /* Lowest priority is that memory references should come second. */
14902 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14903 destination to use for the operation. If different from the true
14904 destination in operands[0], a copy operation will be required. */
14907 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14910 rtx dst = operands[0];
14911 rtx src1 = operands[1];
14912 rtx src2 = operands[2];
14914 /* Canonicalize operand order. */
14915 if (ix86_swap_binary_operands_p (code, mode, operands))
14919 /* It is invalid to swap operands of different modes. */
14920 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14927 /* Both source operands cannot be in memory. */
14928 if (MEM_P (src1) && MEM_P (src2))
14930 /* Optimization: Only read from memory once. */
14931 if (rtx_equal_p (src1, src2))
14933 src2 = force_reg (mode, src2);
14937 src2 = force_reg (mode, src2);
14940 /* If the destination is memory, and we do not have matching source
14941 operands, do things in registers. */
14942 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14943 dst = gen_reg_rtx (mode);
14945 /* Source 1 cannot be a constant. */
14946 if (CONSTANT_P (src1))
14947 src1 = force_reg (mode, src1);
14949 /* Source 1 cannot be a non-matching memory. */
14950 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14951 src1 = force_reg (mode, src1);
14953 operands[1] = src1;
14954 operands[2] = src2;
14958 /* Similarly, but assume that the destination has already been
14959 set up properly. */
14962 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14963 enum machine_mode mode, rtx operands[])
14965 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14966 gcc_assert (dst == operands[0]);
14969 /* Attempt to expand a binary operator. Make the expansion closer to the
14970 actual machine, then just general_operand, which will allow 3 separate
14971 memory references (one output, two input) in a single insn. */
14974 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14977 rtx src1, src2, dst, op, clob;
14979 dst = ix86_fixup_binary_operands (code, mode, operands);
14980 src1 = operands[1];
14981 src2 = operands[2];
14983 /* Emit the instruction. */
14985 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14986 if (reload_in_progress)
14988 /* Reload doesn't know about the flags register, and doesn't know that
14989 it doesn't want to clobber it. We can only do this with PLUS. */
14990 gcc_assert (code == PLUS);
14995 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14996 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14999 /* Fix up the destination if needed. */
15000 if (dst != operands[0])
15001 emit_move_insn (operands[0], dst);
15004 /* Return TRUE or FALSE depending on whether the binary operator meets the
15005 appropriate constraints. */
15008 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15011 rtx dst = operands[0];
15012 rtx src1 = operands[1];
15013 rtx src2 = operands[2];
15015 /* Both source operands cannot be in memory. */
15016 if (MEM_P (src1) && MEM_P (src2))
15019 /* Canonicalize operand order for commutative operators. */
15020 if (ix86_swap_binary_operands_p (code, mode, operands))
15027 /* If the destination is memory, we must have a matching source operand. */
15028 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15031 /* Source 1 cannot be a constant. */
15032 if (CONSTANT_P (src1))
15035 /* Source 1 cannot be a non-matching memory. */
15036 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15038 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15039 return (code == AND
15042 || (TARGET_64BIT && mode == DImode))
15043 && CONST_INT_P (src2)
15044 && (INTVAL (src2) == 0xff
15045 || INTVAL (src2) == 0xffff));
15051 /* Attempt to expand a unary operator. Make the expansion closer to the
15052 actual machine, then just general_operand, which will allow 2 separate
15053 memory references (one output, one input) in a single insn. */
15056 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15059 int matching_memory;
15060 rtx src, dst, op, clob;
15065 /* If the destination is memory, and we do not have matching source
15066 operands, do things in registers. */
15067 matching_memory = 0;
15070 if (rtx_equal_p (dst, src))
15071 matching_memory = 1;
15073 dst = gen_reg_rtx (mode);
15076 /* When source operand is memory, destination must match. */
15077 if (MEM_P (src) && !matching_memory)
15078 src = force_reg (mode, src);
15080 /* Emit the instruction. */
15082 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15083 if (reload_in_progress || code == NOT)
15085 /* Reload doesn't know about the flags register, and doesn't know that
15086 it doesn't want to clobber it. */
15087 gcc_assert (code == NOT);
15092 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15093 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15096 /* Fix up the destination if needed. */
15097 if (dst != operands[0])
15098 emit_move_insn (operands[0], dst);
15101 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15102 divisor are within the the range [0-255]. */
15105 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15108 rtx end_label, qimode_label;
15109 rtx insn, div, mod;
15110 rtx scratch, tmp0, tmp1, tmp2;
15111 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15112 rtx (*gen_zero_extend) (rtx, rtx);
15113 rtx (*gen_test_ccno_1) (rtx, rtx);
15118 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15119 gen_test_ccno_1 = gen_testsi_ccno_1;
15120 gen_zero_extend = gen_zero_extendqisi2;
15123 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15124 gen_test_ccno_1 = gen_testdi_ccno_1;
15125 gen_zero_extend = gen_zero_extendqidi2;
15128 gcc_unreachable ();
15131 end_label = gen_label_rtx ();
15132 qimode_label = gen_label_rtx ();
15134 scratch = gen_reg_rtx (mode);
15136 /* Use 8bit unsigned divimod if dividend and divisor are within the
15137 the range [0-255]. */
15138 emit_move_insn (scratch, operands[2]);
15139 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15140 scratch, 1, OPTAB_DIRECT);
15141 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15142 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15143 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15144 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15145 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15147 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15148 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15149 JUMP_LABEL (insn) = qimode_label;
15151 /* Generate original signed/unsigned divimod. */
15152 div = gen_divmod4_1 (operands[0], operands[1],
15153 operands[2], operands[3]);
15156 /* Branch to the end. */
15157 emit_jump_insn (gen_jump (end_label));
15160 /* Generate 8bit unsigned divide. */
15161 emit_label (qimode_label);
15162 /* Don't use operands[0] for result of 8bit divide since not all
15163 registers support QImode ZERO_EXTRACT. */
15164 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15165 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15166 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15167 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15171 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15172 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15176 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15177 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15180 /* Extract remainder from AH. */
15181 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15182 if (REG_P (operands[1]))
15183 insn = emit_move_insn (operands[1], tmp1);
15186 /* Need a new scratch register since the old one has result
15188 scratch = gen_reg_rtx (mode);
15189 emit_move_insn (scratch, tmp1);
15190 insn = emit_move_insn (operands[1], scratch);
15192 set_unique_reg_note (insn, REG_EQUAL, mod);
15194 /* Zero extend quotient from AL. */
15195 tmp1 = gen_lowpart (QImode, tmp0);
15196 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15197 set_unique_reg_note (insn, REG_EQUAL, div);
15199 emit_label (end_label);
15202 #define LEA_SEARCH_THRESHOLD 12
15204 /* Search backward for non-agu definition of register number REGNO1
15205 or register number REGNO2 in INSN's basic block until
15206 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15207 2. Reach BB boundary, or
15208 3. Reach agu definition.
15209 Returns the distance between the non-agu definition point and INSN.
15210 If no definition point, returns -1. */
15213 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15216 basic_block bb = BLOCK_FOR_INSN (insn);
15219 enum attr_type insn_type;
15221 if (insn != BB_HEAD (bb))
15223 rtx prev = PREV_INSN (insn);
15224 while (prev && distance < LEA_SEARCH_THRESHOLD)
15226 if (NONDEBUG_INSN_P (prev))
15229 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15230 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15231 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15232 && (regno1 == DF_REF_REGNO (*def_rec)
15233 || regno2 == DF_REF_REGNO (*def_rec)))
15235 insn_type = get_attr_type (prev);
15236 if (insn_type != TYPE_LEA)
15240 if (prev == BB_HEAD (bb))
15242 prev = PREV_INSN (prev);
15246 if (distance < LEA_SEARCH_THRESHOLD)
15250 bool simple_loop = false;
15252 FOR_EACH_EDGE (e, ei, bb->preds)
15255 simple_loop = true;
15261 rtx prev = BB_END (bb);
15264 && distance < LEA_SEARCH_THRESHOLD)
15266 if (NONDEBUG_INSN_P (prev))
15269 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15270 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15271 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15272 && (regno1 == DF_REF_REGNO (*def_rec)
15273 || regno2 == DF_REF_REGNO (*def_rec)))
15275 insn_type = get_attr_type (prev);
15276 if (insn_type != TYPE_LEA)
15280 prev = PREV_INSN (prev);
15288 /* get_attr_type may modify recog data. We want to make sure
15289 that recog data is valid for instruction INSN, on which
15290 distance_non_agu_define is called. INSN is unchanged here. */
15291 extract_insn_cached (insn);
15295 /* Return the distance between INSN and the next insn that uses
15296 register number REGNO0 in memory address. Return -1 if no such
15297 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15300 distance_agu_use (unsigned int regno0, rtx insn)
15302 basic_block bb = BLOCK_FOR_INSN (insn);
15307 if (insn != BB_END (bb))
15309 rtx next = NEXT_INSN (insn);
15310 while (next && distance < LEA_SEARCH_THRESHOLD)
15312 if (NONDEBUG_INSN_P (next))
15316 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15317 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15318 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15319 && regno0 == DF_REF_REGNO (*use_rec))
15321 /* Return DISTANCE if OP0 is used in memory
15322 address in NEXT. */
15326 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15327 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15328 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15329 && regno0 == DF_REF_REGNO (*def_rec))
15331 /* Return -1 if OP0 is set in NEXT. */
15335 if (next == BB_END (bb))
15337 next = NEXT_INSN (next);
15341 if (distance < LEA_SEARCH_THRESHOLD)
15345 bool simple_loop = false;
15347 FOR_EACH_EDGE (e, ei, bb->succs)
15350 simple_loop = true;
15356 rtx next = BB_HEAD (bb);
15359 && distance < LEA_SEARCH_THRESHOLD)
15361 if (NONDEBUG_INSN_P (next))
15365 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15366 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15367 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15368 && regno0 == DF_REF_REGNO (*use_rec))
15370 /* Return DISTANCE if OP0 is used in memory
15371 address in NEXT. */
15375 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15376 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15377 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15378 && regno0 == DF_REF_REGNO (*def_rec))
15380 /* Return -1 if OP0 is set in NEXT. */
15385 next = NEXT_INSN (next);
15393 /* Define this macro to tune LEA priority vs ADD, it take effect when
15394 there is a dilemma of choicing LEA or ADD
15395 Negative value: ADD is more preferred than LEA
15397 Positive value: LEA is more preferred than ADD*/
15398 #define IX86_LEA_PRIORITY 2
15400 /* Return true if it is ok to optimize an ADD operation to LEA
15401 operation to avoid flag register consumation. For most processors,
15402 ADD is faster than LEA. For the processors like ATOM, if the
15403 destination register of LEA holds an actual address which will be
15404 used soon, LEA is better and otherwise ADD is better. */
15407 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15409 unsigned int regno0 = true_regnum (operands[0]);
15410 unsigned int regno1 = true_regnum (operands[1]);
15411 unsigned int regno2 = true_regnum (operands[2]);
15413 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15414 if (regno0 != regno1 && regno0 != regno2)
15417 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15421 int dist_define, dist_use;
15423 /* Return false if REGNO0 isn't used in memory address. */
15424 dist_use = distance_agu_use (regno0, insn);
15428 dist_define = distance_non_agu_define (regno1, regno2, insn);
15429 if (dist_define <= 0)
15432 /* If this insn has both backward non-agu dependence and forward
15433 agu dependence, the one with short distance take effect. */
15434 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15441 /* Return true if destination reg of SET_BODY is shift count of
15445 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15451 /* Retrieve destination of SET_BODY. */
15452 switch (GET_CODE (set_body))
15455 set_dest = SET_DEST (set_body);
15456 if (!set_dest || !REG_P (set_dest))
15460 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15461 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15469 /* Retrieve shift count of USE_BODY. */
15470 switch (GET_CODE (use_body))
15473 shift_rtx = XEXP (use_body, 1);
15476 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15477 if (ix86_dep_by_shift_count_body (set_body,
15478 XVECEXP (use_body, 0, i)))
15486 && (GET_CODE (shift_rtx) == ASHIFT
15487 || GET_CODE (shift_rtx) == LSHIFTRT
15488 || GET_CODE (shift_rtx) == ASHIFTRT
15489 || GET_CODE (shift_rtx) == ROTATE
15490 || GET_CODE (shift_rtx) == ROTATERT))
15492 rtx shift_count = XEXP (shift_rtx, 1);
15494 /* Return true if shift count is dest of SET_BODY. */
15495 if (REG_P (shift_count)
15496 && true_regnum (set_dest) == true_regnum (shift_count))
15503 /* Return true if destination reg of SET_INSN is shift count of
15507 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15509 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15510 PATTERN (use_insn));
15513 /* Return TRUE or FALSE depending on whether the unary operator meets the
15514 appropriate constraints. */
15517 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15518 enum machine_mode mode ATTRIBUTE_UNUSED,
15519 rtx operands[2] ATTRIBUTE_UNUSED)
15521 /* If one of operands is memory, source and destination must match. */
15522 if ((MEM_P (operands[0])
15523 || MEM_P (operands[1]))
15524 && ! rtx_equal_p (operands[0], operands[1]))
15529 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15530 are ok, keeping in mind the possible movddup alternative. */
15533 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15535 if (MEM_P (operands[0]))
15536 return rtx_equal_p (operands[0], operands[1 + high]);
15537 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15538 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15542 /* Post-reload splitter for converting an SF or DFmode value in an
15543 SSE register into an unsigned SImode. */
15546 ix86_split_convert_uns_si_sse (rtx operands[])
15548 enum machine_mode vecmode;
15549 rtx value, large, zero_or_two31, input, two31, x;
15551 large = operands[1];
15552 zero_or_two31 = operands[2];
15553 input = operands[3];
15554 two31 = operands[4];
15555 vecmode = GET_MODE (large);
15556 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15558 /* Load up the value into the low element. We must ensure that the other
15559 elements are valid floats -- zero is the easiest such value. */
15562 if (vecmode == V4SFmode)
15563 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15565 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15569 input = gen_rtx_REG (vecmode, REGNO (input));
15570 emit_move_insn (value, CONST0_RTX (vecmode));
15571 if (vecmode == V4SFmode)
15572 emit_insn (gen_sse_movss (value, value, input));
15574 emit_insn (gen_sse2_movsd (value, value, input));
15577 emit_move_insn (large, two31);
15578 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15580 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15581 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15583 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15584 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15586 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15587 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15589 large = gen_rtx_REG (V4SImode, REGNO (large));
15590 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15592 x = gen_rtx_REG (V4SImode, REGNO (value));
15593 if (vecmode == V4SFmode)
15594 emit_insn (gen_sse2_cvttps2dq (x, value));
15596 emit_insn (gen_sse2_cvttpd2dq (x, value));
15599 emit_insn (gen_xorv4si3 (value, value, large));
15602 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15603 Expects the 64-bit DImode to be supplied in a pair of integral
15604 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15605 -mfpmath=sse, !optimize_size only. */
15608 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15610 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15611 rtx int_xmm, fp_xmm;
15612 rtx biases, exponents;
15615 int_xmm = gen_reg_rtx (V4SImode);
15616 if (TARGET_INTER_UNIT_MOVES)
15617 emit_insn (gen_movdi_to_sse (int_xmm, input));
15618 else if (TARGET_SSE_SPLIT_REGS)
15620 emit_clobber (int_xmm);
15621 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15625 x = gen_reg_rtx (V2DImode);
15626 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15627 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15630 x = gen_rtx_CONST_VECTOR (V4SImode,
15631 gen_rtvec (4, GEN_INT (0x43300000UL),
15632 GEN_INT (0x45300000UL),
15633 const0_rtx, const0_rtx));
15634 exponents = validize_mem (force_const_mem (V4SImode, x));
15636 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15637 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15639 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15640 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15641 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15642 (0x1.0p84 + double(fp_value_hi_xmm)).
15643 Note these exponents differ by 32. */
15645 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15647 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15648 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15649 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15650 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15651 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15652 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15653 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15654 biases = validize_mem (force_const_mem (V2DFmode, biases));
15655 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15657 /* Add the upper and lower DFmode values together. */
15659 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15662 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15663 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15664 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15667 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15670 /* Not used, but eases macroization of patterns. */
15672 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15673 rtx input ATTRIBUTE_UNUSED)
15675 gcc_unreachable ();
15678 /* Convert an unsigned SImode value into a DFmode. Only currently used
15679 for SSE, but applicable anywhere. */
15682 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15684 REAL_VALUE_TYPE TWO31r;
15687 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15688 NULL, 1, OPTAB_DIRECT);
15690 fp = gen_reg_rtx (DFmode);
15691 emit_insn (gen_floatsidf2 (fp, x));
15693 real_ldexp (&TWO31r, &dconst1, 31);
15694 x = const_double_from_real_value (TWO31r, DFmode);
15696 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15698 emit_move_insn (target, x);
15701 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15702 32-bit mode; otherwise we have a direct convert instruction. */
15705 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15707 REAL_VALUE_TYPE TWO32r;
15708 rtx fp_lo, fp_hi, x;
15710 fp_lo = gen_reg_rtx (DFmode);
15711 fp_hi = gen_reg_rtx (DFmode);
15713 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15715 real_ldexp (&TWO32r, &dconst1, 32);
15716 x = const_double_from_real_value (TWO32r, DFmode);
15717 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15719 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15721 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15724 emit_move_insn (target, x);
15727 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15728 For x86_32, -mfpmath=sse, !optimize_size only. */
15730 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15732 REAL_VALUE_TYPE ONE16r;
15733 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15735 real_ldexp (&ONE16r, &dconst1, 16);
15736 x = const_double_from_real_value (ONE16r, SFmode);
15737 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15738 NULL, 0, OPTAB_DIRECT);
15739 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15740 NULL, 0, OPTAB_DIRECT);
15741 fp_hi = gen_reg_rtx (SFmode);
15742 fp_lo = gen_reg_rtx (SFmode);
15743 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15744 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15745 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15747 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15749 if (!rtx_equal_p (target, fp_hi))
15750 emit_move_insn (target, fp_hi);
15753 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15754 then replicate the value for all elements of the vector
15758 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15765 v = gen_rtvec (4, value, value, value, value);
15766 return gen_rtx_CONST_VECTOR (V4SImode, v);
15770 v = gen_rtvec (2, value, value);
15771 return gen_rtx_CONST_VECTOR (V2DImode, v);
15775 v = gen_rtvec (8, value, value, value, value,
15776 value, value, value, value);
15778 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
15779 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
15780 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
15781 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15782 return gen_rtx_CONST_VECTOR (V8SFmode, v);
15786 v = gen_rtvec (4, value, value, value, value);
15788 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15789 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15790 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15794 v = gen_rtvec (4, value, value, value, value);
15796 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
15797 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
15798 return gen_rtx_CONST_VECTOR (V4DFmode, v);
15802 v = gen_rtvec (2, value, value);
15804 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15805 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15808 gcc_unreachable ();
15812 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15813 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15814 for an SSE register. If VECT is true, then replicate the mask for
15815 all elements of the vector register. If INVERT is true, then create
15816 a mask excluding the sign bit. */
15819 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15821 enum machine_mode vec_mode, imode;
15822 HOST_WIDE_INT hi, lo;
15827 /* Find the sign bit, sign extended to 2*HWI. */
15834 mode = GET_MODE_INNER (mode);
15836 lo = 0x80000000, hi = lo < 0;
15843 mode = GET_MODE_INNER (mode);
15845 if (HOST_BITS_PER_WIDE_INT >= 64)
15846 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15848 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15853 vec_mode = VOIDmode;
15854 if (HOST_BITS_PER_WIDE_INT >= 64)
15857 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15864 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15868 lo = ~lo, hi = ~hi;
15874 mask = immed_double_const (lo, hi, imode);
15876 vec = gen_rtvec (2, v, mask);
15877 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15878 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15885 gcc_unreachable ();
15889 lo = ~lo, hi = ~hi;
15891 /* Force this value into the low part of a fp vector constant. */
15892 mask = immed_double_const (lo, hi, imode);
15893 mask = gen_lowpart (mode, mask);
15895 if (vec_mode == VOIDmode)
15896 return force_reg (mode, mask);
15898 v = ix86_build_const_vector (vec_mode, vect, mask);
15899 return force_reg (vec_mode, v);
15902 /* Generate code for floating point ABS or NEG. */
15905 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15908 rtx mask, set, use, clob, dst, src;
15909 bool use_sse = false;
15910 bool vector_mode = VECTOR_MODE_P (mode);
15911 enum machine_mode vmode = mode;
15915 else if (mode == TFmode)
15917 else if (TARGET_SSE_MATH)
15919 use_sse = SSE_FLOAT_MODE_P (mode);
15920 if (mode == SFmode)
15922 else if (mode == DFmode)
15926 /* NEG and ABS performed with SSE use bitwise mask operations.
15927 Create the appropriate mask now. */
15929 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
15938 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15939 set = gen_rtx_SET (VOIDmode, dst, set);
15944 set = gen_rtx_fmt_e (code, mode, src);
15945 set = gen_rtx_SET (VOIDmode, dst, set);
15948 use = gen_rtx_USE (VOIDmode, mask);
15949 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15950 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15951 gen_rtvec (3, set, use, clob)));
15958 /* Expand a copysign operation. Special case operand 0 being a constant. */
15961 ix86_expand_copysign (rtx operands[])
15963 enum machine_mode mode, vmode;
15964 rtx dest, op0, op1, mask, nmask;
15966 dest = operands[0];
15970 mode = GET_MODE (dest);
15972 if (mode == SFmode)
15974 else if (mode == DFmode)
15979 if (GET_CODE (op0) == CONST_DOUBLE)
15981 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15983 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15984 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15986 if (mode == SFmode || mode == DFmode)
15988 if (op0 == CONST0_RTX (mode))
15989 op0 = CONST0_RTX (vmode);
15992 rtx v = ix86_build_const_vector (vmode, false, op0);
15994 op0 = force_reg (vmode, v);
15997 else if (op0 != CONST0_RTX (mode))
15998 op0 = force_reg (mode, op0);
16000 mask = ix86_build_signbit_mask (vmode, 0, 0);
16002 if (mode == SFmode)
16003 copysign_insn = gen_copysignsf3_const;
16004 else if (mode == DFmode)
16005 copysign_insn = gen_copysigndf3_const;
16007 copysign_insn = gen_copysigntf3_const;
16009 emit_insn (copysign_insn (dest, op0, op1, mask));
16013 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16015 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16016 mask = ix86_build_signbit_mask (vmode, 0, 0);
16018 if (mode == SFmode)
16019 copysign_insn = gen_copysignsf3_var;
16020 else if (mode == DFmode)
16021 copysign_insn = gen_copysigndf3_var;
16023 copysign_insn = gen_copysigntf3_var;
16025 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16029 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16030 be a constant, and so has already been expanded into a vector constant. */
16033 ix86_split_copysign_const (rtx operands[])
16035 enum machine_mode mode, vmode;
16036 rtx dest, op0, mask, x;
16038 dest = operands[0];
16040 mask = operands[3];
16042 mode = GET_MODE (dest);
16043 vmode = GET_MODE (mask);
16045 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16046 x = gen_rtx_AND (vmode, dest, mask);
16047 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16049 if (op0 != CONST0_RTX (vmode))
16051 x = gen_rtx_IOR (vmode, dest, op0);
16052 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16056 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16057 so we have to do two masks. */
16060 ix86_split_copysign_var (rtx operands[])
16062 enum machine_mode mode, vmode;
16063 rtx dest, scratch, op0, op1, mask, nmask, x;
16065 dest = operands[0];
16066 scratch = operands[1];
16069 nmask = operands[4];
16070 mask = operands[5];
16072 mode = GET_MODE (dest);
16073 vmode = GET_MODE (mask);
16075 if (rtx_equal_p (op0, op1))
16077 /* Shouldn't happen often (it's useless, obviously), but when it does
16078 we'd generate incorrect code if we continue below. */
16079 emit_move_insn (dest, op0);
16083 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16085 gcc_assert (REGNO (op1) == REGNO (scratch));
16087 x = gen_rtx_AND (vmode, scratch, mask);
16088 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16091 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16092 x = gen_rtx_NOT (vmode, dest);
16093 x = gen_rtx_AND (vmode, x, op0);
16094 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16098 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16100 x = gen_rtx_AND (vmode, scratch, mask);
16102 else /* alternative 2,4 */
16104 gcc_assert (REGNO (mask) == REGNO (scratch));
16105 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16106 x = gen_rtx_AND (vmode, scratch, op1);
16108 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16110 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16112 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16113 x = gen_rtx_AND (vmode, dest, nmask);
16115 else /* alternative 3,4 */
16117 gcc_assert (REGNO (nmask) == REGNO (dest));
16119 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16120 x = gen_rtx_AND (vmode, dest, op0);
16122 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16125 x = gen_rtx_IOR (vmode, dest, scratch);
16126 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16129 /* Return TRUE or FALSE depending on whether the first SET in INSN
16130 has source and destination with matching CC modes, and that the
16131 CC mode is at least as constrained as REQ_MODE. */
16134 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16137 enum machine_mode set_mode;
16139 set = PATTERN (insn);
16140 if (GET_CODE (set) == PARALLEL)
16141 set = XVECEXP (set, 0, 0);
16142 gcc_assert (GET_CODE (set) == SET);
16143 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16145 set_mode = GET_MODE (SET_DEST (set));
16149 if (req_mode != CCNOmode
16150 && (req_mode != CCmode
16151 || XEXP (SET_SRC (set), 1) != const0_rtx))
16155 if (req_mode == CCGCmode)
16159 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16163 if (req_mode == CCZmode)
16174 gcc_unreachable ();
16177 return GET_MODE (SET_SRC (set)) == set_mode;
16180 /* Generate insn patterns to do an integer compare of OPERANDS. */
16183 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16185 enum machine_mode cmpmode;
16188 cmpmode = SELECT_CC_MODE (code, op0, op1);
16189 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16191 /* This is very simple, but making the interface the same as in the
16192 FP case makes the rest of the code easier. */
16193 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16194 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16196 /* Return the test that should be put into the flags user, i.e.
16197 the bcc, scc, or cmov instruction. */
16198 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16201 /* Figure out whether to use ordered or unordered fp comparisons.
16202 Return the appropriate mode to use. */
16205 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16207 /* ??? In order to make all comparisons reversible, we do all comparisons
16208 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16209 all forms trapping and nontrapping comparisons, we can make inequality
16210 comparisons trapping again, since it results in better code when using
16211 FCOM based compares. */
16212 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16216 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16218 enum machine_mode mode = GET_MODE (op0);
16220 if (SCALAR_FLOAT_MODE_P (mode))
16222 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16223 return ix86_fp_compare_mode (code);
16228 /* Only zero flag is needed. */
16229 case EQ: /* ZF=0 */
16230 case NE: /* ZF!=0 */
16232 /* Codes needing carry flag. */
16233 case GEU: /* CF=0 */
16234 case LTU: /* CF=1 */
16235 /* Detect overflow checks. They need just the carry flag. */
16236 if (GET_CODE (op0) == PLUS
16237 && rtx_equal_p (op1, XEXP (op0, 0)))
16241 case GTU: /* CF=0 & ZF=0 */
16242 case LEU: /* CF=1 | ZF=1 */
16243 /* Detect overflow checks. They need just the carry flag. */
16244 if (GET_CODE (op0) == MINUS
16245 && rtx_equal_p (op1, XEXP (op0, 0)))
16249 /* Codes possibly doable only with sign flag when
16250 comparing against zero. */
16251 case GE: /* SF=OF or SF=0 */
16252 case LT: /* SF<>OF or SF=1 */
16253 if (op1 == const0_rtx)
16256 /* For other cases Carry flag is not required. */
16258 /* Codes doable only with sign flag when comparing
16259 against zero, but we miss jump instruction for it
16260 so we need to use relational tests against overflow
16261 that thus needs to be zero. */
16262 case GT: /* ZF=0 & SF=OF */
16263 case LE: /* ZF=1 | SF<>OF */
16264 if (op1 == const0_rtx)
16268 /* strcmp pattern do (use flags) and combine may ask us for proper
16273 gcc_unreachable ();
16277 /* Return the fixed registers used for condition codes. */
16280 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16287 /* If two condition code modes are compatible, return a condition code
16288 mode which is compatible with both. Otherwise, return
16291 static enum machine_mode
16292 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16297 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16300 if ((m1 == CCGCmode && m2 == CCGOCmode)
16301 || (m1 == CCGOCmode && m2 == CCGCmode))
16307 gcc_unreachable ();
16337 /* These are only compatible with themselves, which we already
16344 /* Return a comparison we can do and that it is equivalent to
16345 swap_condition (code) apart possibly from orderedness.
16346 But, never change orderedness if TARGET_IEEE_FP, returning
16347 UNKNOWN in that case if necessary. */
16349 static enum rtx_code
16350 ix86_fp_swap_condition (enum rtx_code code)
16354 case GT: /* GTU - CF=0 & ZF=0 */
16355 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16356 case GE: /* GEU - CF=0 */
16357 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16358 case UNLT: /* LTU - CF=1 */
16359 return TARGET_IEEE_FP ? UNKNOWN : GT;
16360 case UNLE: /* LEU - CF=1 | ZF=1 */
16361 return TARGET_IEEE_FP ? UNKNOWN : GE;
16363 return swap_condition (code);
16367 /* Return cost of comparison CODE using the best strategy for performance.
16368 All following functions do use number of instructions as a cost metrics.
16369 In future this should be tweaked to compute bytes for optimize_size and
16370 take into account performance of various instructions on various CPUs. */
16373 ix86_fp_comparison_cost (enum rtx_code code)
16377 /* The cost of code using bit-twiddling on %ah. */
16394 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16398 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16401 gcc_unreachable ();
16404 switch (ix86_fp_comparison_strategy (code))
16406 case IX86_FPCMP_COMI:
16407 return arith_cost > 4 ? 3 : 2;
16408 case IX86_FPCMP_SAHF:
16409 return arith_cost > 4 ? 4 : 3;
16415 /* Return strategy to use for floating-point. We assume that fcomi is always
16416 preferrable where available, since that is also true when looking at size
16417 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16419 enum ix86_fpcmp_strategy
16420 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16422 /* Do fcomi/sahf based test when profitable. */
16425 return IX86_FPCMP_COMI;
16427 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16428 return IX86_FPCMP_SAHF;
16430 return IX86_FPCMP_ARITH;
16433 /* Swap, force into registers, or otherwise massage the two operands
16434 to a fp comparison. The operands are updated in place; the new
16435 comparison code is returned. */
16437 static enum rtx_code
16438 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16440 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16441 rtx op0 = *pop0, op1 = *pop1;
16442 enum machine_mode op_mode = GET_MODE (op0);
16443 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16445 /* All of the unordered compare instructions only work on registers.
16446 The same is true of the fcomi compare instructions. The XFmode
16447 compare instructions require registers except when comparing
16448 against zero or when converting operand 1 from fixed point to
16452 && (fpcmp_mode == CCFPUmode
16453 || (op_mode == XFmode
16454 && ! (standard_80387_constant_p (op0) == 1
16455 || standard_80387_constant_p (op1) == 1)
16456 && GET_CODE (op1) != FLOAT)
16457 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16459 op0 = force_reg (op_mode, op0);
16460 op1 = force_reg (op_mode, op1);
16464 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16465 things around if they appear profitable, otherwise force op0
16466 into a register. */
16468 if (standard_80387_constant_p (op0) == 0
16470 && ! (standard_80387_constant_p (op1) == 0
16473 enum rtx_code new_code = ix86_fp_swap_condition (code);
16474 if (new_code != UNKNOWN)
16477 tmp = op0, op0 = op1, op1 = tmp;
16483 op0 = force_reg (op_mode, op0);
16485 if (CONSTANT_P (op1))
16487 int tmp = standard_80387_constant_p (op1);
16489 op1 = validize_mem (force_const_mem (op_mode, op1));
16493 op1 = force_reg (op_mode, op1);
16496 op1 = force_reg (op_mode, op1);
16500 /* Try to rearrange the comparison to make it cheaper. */
16501 if (ix86_fp_comparison_cost (code)
16502 > ix86_fp_comparison_cost (swap_condition (code))
16503 && (REG_P (op1) || can_create_pseudo_p ()))
16506 tmp = op0, op0 = op1, op1 = tmp;
16507 code = swap_condition (code);
16509 op0 = force_reg (op_mode, op0);
16517 /* Convert comparison codes we use to represent FP comparison to integer
16518 code that will result in proper branch. Return UNKNOWN if no such code
16522 ix86_fp_compare_code_to_integer (enum rtx_code code)
16551 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16554 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16556 enum machine_mode fpcmp_mode, intcmp_mode;
16559 fpcmp_mode = ix86_fp_compare_mode (code);
16560 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16562 /* Do fcomi/sahf based test when profitable. */
16563 switch (ix86_fp_comparison_strategy (code))
16565 case IX86_FPCMP_COMI:
16566 intcmp_mode = fpcmp_mode;
16567 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16568 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16573 case IX86_FPCMP_SAHF:
16574 intcmp_mode = fpcmp_mode;
16575 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16576 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16580 scratch = gen_reg_rtx (HImode);
16581 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16582 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16585 case IX86_FPCMP_ARITH:
16586 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16587 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16588 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16590 scratch = gen_reg_rtx (HImode);
16591 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16593 /* In the unordered case, we have to check C2 for NaN's, which
16594 doesn't happen to work out to anything nice combination-wise.
16595 So do some bit twiddling on the value we've got in AH to come
16596 up with an appropriate set of condition codes. */
16598 intcmp_mode = CCNOmode;
16603 if (code == GT || !TARGET_IEEE_FP)
16605 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16610 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16611 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16612 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16613 intcmp_mode = CCmode;
16619 if (code == LT && TARGET_IEEE_FP)
16621 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16622 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16623 intcmp_mode = CCmode;
16628 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16634 if (code == GE || !TARGET_IEEE_FP)
16636 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16641 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16642 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16648 if (code == LE && TARGET_IEEE_FP)
16650 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16651 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16652 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16653 intcmp_mode = CCmode;
16658 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16664 if (code == EQ && TARGET_IEEE_FP)
16666 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16667 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16668 intcmp_mode = CCmode;
16673 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16679 if (code == NE && TARGET_IEEE_FP)
16681 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16682 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16688 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16694 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16698 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16703 gcc_unreachable ();
16711 /* Return the test that should be put into the flags user, i.e.
16712 the bcc, scc, or cmov instruction. */
16713 return gen_rtx_fmt_ee (code, VOIDmode,
16714 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16719 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16723 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16724 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16726 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16728 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16729 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16732 ret = ix86_expand_int_compare (code, op0, op1);
16738 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16740 enum machine_mode mode = GET_MODE (op0);
16752 tmp = ix86_expand_compare (code, op0, op1);
16753 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16754 gen_rtx_LABEL_REF (VOIDmode, label),
16756 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16763 /* Expand DImode branch into multiple compare+branch. */
16765 rtx lo[2], hi[2], label2;
16766 enum rtx_code code1, code2, code3;
16767 enum machine_mode submode;
16769 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16771 tmp = op0, op0 = op1, op1 = tmp;
16772 code = swap_condition (code);
16775 split_double_mode (mode, &op0, 1, lo+0, hi+0);
16776 split_double_mode (mode, &op1, 1, lo+1, hi+1);
16778 submode = mode == DImode ? SImode : DImode;
16780 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16781 avoid two branches. This costs one extra insn, so disable when
16782 optimizing for size. */
16784 if ((code == EQ || code == NE)
16785 && (!optimize_insn_for_size_p ()
16786 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16791 if (hi[1] != const0_rtx)
16792 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16793 NULL_RTX, 0, OPTAB_WIDEN);
16796 if (lo[1] != const0_rtx)
16797 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16798 NULL_RTX, 0, OPTAB_WIDEN);
16800 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16801 NULL_RTX, 0, OPTAB_WIDEN);
16803 ix86_expand_branch (code, tmp, const0_rtx, label);
16807 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16808 op1 is a constant and the low word is zero, then we can just
16809 examine the high word. Similarly for low word -1 and
16810 less-or-equal-than or greater-than. */
16812 if (CONST_INT_P (hi[1]))
16815 case LT: case LTU: case GE: case GEU:
16816 if (lo[1] == const0_rtx)
16818 ix86_expand_branch (code, hi[0], hi[1], label);
16822 case LE: case LEU: case GT: case GTU:
16823 if (lo[1] == constm1_rtx)
16825 ix86_expand_branch (code, hi[0], hi[1], label);
16833 /* Otherwise, we need two or three jumps. */
16835 label2 = gen_label_rtx ();
16838 code2 = swap_condition (code);
16839 code3 = unsigned_condition (code);
16843 case LT: case GT: case LTU: case GTU:
16846 case LE: code1 = LT; code2 = GT; break;
16847 case GE: code1 = GT; code2 = LT; break;
16848 case LEU: code1 = LTU; code2 = GTU; break;
16849 case GEU: code1 = GTU; code2 = LTU; break;
16851 case EQ: code1 = UNKNOWN; code2 = NE; break;
16852 case NE: code2 = UNKNOWN; break;
16855 gcc_unreachable ();
16860 * if (hi(a) < hi(b)) goto true;
16861 * if (hi(a) > hi(b)) goto false;
16862 * if (lo(a) < lo(b)) goto true;
16866 if (code1 != UNKNOWN)
16867 ix86_expand_branch (code1, hi[0], hi[1], label);
16868 if (code2 != UNKNOWN)
16869 ix86_expand_branch (code2, hi[0], hi[1], label2);
16871 ix86_expand_branch (code3, lo[0], lo[1], label);
16873 if (code2 != UNKNOWN)
16874 emit_label (label2);
16879 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16884 /* Split branch based on floating point condition. */
16886 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16887 rtx target1, rtx target2, rtx tmp, rtx pushed)
16892 if (target2 != pc_rtx)
16895 code = reverse_condition_maybe_unordered (code);
16900 condition = ix86_expand_fp_compare (code, op1, op2,
16903 /* Remove pushed operand from stack. */
16905 ix86_free_from_memory (GET_MODE (pushed));
16907 i = emit_jump_insn (gen_rtx_SET
16909 gen_rtx_IF_THEN_ELSE (VOIDmode,
16910 condition, target1, target2)));
16911 if (split_branch_probability >= 0)
16912 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16916 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16920 gcc_assert (GET_MODE (dest) == QImode);
16922 ret = ix86_expand_compare (code, op0, op1);
16923 PUT_MODE (ret, QImode);
16924 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16927 /* Expand comparison setting or clearing carry flag. Return true when
16928 successful and set pop for the operation. */
16930 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16932 enum machine_mode mode =
16933 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16935 /* Do not handle double-mode compares that go through special path. */
16936 if (mode == (TARGET_64BIT ? TImode : DImode))
16939 if (SCALAR_FLOAT_MODE_P (mode))
16941 rtx compare_op, compare_seq;
16943 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16945 /* Shortcut: following common codes never translate
16946 into carry flag compares. */
16947 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16948 || code == ORDERED || code == UNORDERED)
16951 /* These comparisons require zero flag; swap operands so they won't. */
16952 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16953 && !TARGET_IEEE_FP)
16958 code = swap_condition (code);
16961 /* Try to expand the comparison and verify that we end up with
16962 carry flag based comparison. This fails to be true only when
16963 we decide to expand comparison using arithmetic that is not
16964 too common scenario. */
16966 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16967 compare_seq = get_insns ();
16970 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16971 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16972 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16974 code = GET_CODE (compare_op);
16976 if (code != LTU && code != GEU)
16979 emit_insn (compare_seq);
16984 if (!INTEGRAL_MODE_P (mode))
16993 /* Convert a==0 into (unsigned)a<1. */
16996 if (op1 != const0_rtx)
16999 code = (code == EQ ? LTU : GEU);
17002 /* Convert a>b into b<a or a>=b-1. */
17005 if (CONST_INT_P (op1))
17007 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17008 /* Bail out on overflow. We still can swap operands but that
17009 would force loading of the constant into register. */
17010 if (op1 == const0_rtx
17011 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17013 code = (code == GTU ? GEU : LTU);
17020 code = (code == GTU ? LTU : GEU);
17024 /* Convert a>=0 into (unsigned)a<0x80000000. */
17027 if (mode == DImode || op1 != const0_rtx)
17029 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17030 code = (code == LT ? GEU : LTU);
17034 if (mode == DImode || op1 != constm1_rtx)
17036 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17037 code = (code == LE ? GEU : LTU);
17043 /* Swapping operands may cause constant to appear as first operand. */
17044 if (!nonimmediate_operand (op0, VOIDmode))
17046 if (!can_create_pseudo_p ())
17048 op0 = force_reg (mode, op0);
17050 *pop = ix86_expand_compare (code, op0, op1);
17051 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17056 ix86_expand_int_movcc (rtx operands[])
17058 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17059 rtx compare_seq, compare_op;
17060 enum machine_mode mode = GET_MODE (operands[0]);
17061 bool sign_bit_compare_p = false;
17062 rtx op0 = XEXP (operands[1], 0);
17063 rtx op1 = XEXP (operands[1], 1);
17066 compare_op = ix86_expand_compare (code, op0, op1);
17067 compare_seq = get_insns ();
17070 compare_code = GET_CODE (compare_op);
17072 if ((op1 == const0_rtx && (code == GE || code == LT))
17073 || (op1 == constm1_rtx && (code == GT || code == LE)))
17074 sign_bit_compare_p = true;
17076 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17077 HImode insns, we'd be swallowed in word prefix ops. */
17079 if ((mode != HImode || TARGET_FAST_PREFIX)
17080 && (mode != (TARGET_64BIT ? TImode : DImode))
17081 && CONST_INT_P (operands[2])
17082 && CONST_INT_P (operands[3]))
17084 rtx out = operands[0];
17085 HOST_WIDE_INT ct = INTVAL (operands[2]);
17086 HOST_WIDE_INT cf = INTVAL (operands[3]);
17087 HOST_WIDE_INT diff;
17090 /* Sign bit compares are better done using shifts than we do by using
17092 if (sign_bit_compare_p
17093 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17095 /* Detect overlap between destination and compare sources. */
17098 if (!sign_bit_compare_p)
17101 bool fpcmp = false;
17103 compare_code = GET_CODE (compare_op);
17105 flags = XEXP (compare_op, 0);
17107 if (GET_MODE (flags) == CCFPmode
17108 || GET_MODE (flags) == CCFPUmode)
17112 = ix86_fp_compare_code_to_integer (compare_code);
17115 /* To simplify rest of code, restrict to the GEU case. */
17116 if (compare_code == LTU)
17118 HOST_WIDE_INT tmp = ct;
17121 compare_code = reverse_condition (compare_code);
17122 code = reverse_condition (code);
17127 PUT_CODE (compare_op,
17128 reverse_condition_maybe_unordered
17129 (GET_CODE (compare_op)));
17131 PUT_CODE (compare_op,
17132 reverse_condition (GET_CODE (compare_op)));
17136 if (reg_overlap_mentioned_p (out, op0)
17137 || reg_overlap_mentioned_p (out, op1))
17138 tmp = gen_reg_rtx (mode);
17140 if (mode == DImode)
17141 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17143 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17144 flags, compare_op));
17148 if (code == GT || code == GE)
17149 code = reverse_condition (code);
17152 HOST_WIDE_INT tmp = ct;
17157 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17170 tmp = expand_simple_binop (mode, PLUS,
17172 copy_rtx (tmp), 1, OPTAB_DIRECT);
17183 tmp = expand_simple_binop (mode, IOR,
17185 copy_rtx (tmp), 1, OPTAB_DIRECT);
17187 else if (diff == -1 && ct)
17197 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17199 tmp = expand_simple_binop (mode, PLUS,
17200 copy_rtx (tmp), GEN_INT (cf),
17201 copy_rtx (tmp), 1, OPTAB_DIRECT);
17209 * andl cf - ct, dest
17219 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17222 tmp = expand_simple_binop (mode, AND,
17224 gen_int_mode (cf - ct, mode),
17225 copy_rtx (tmp), 1, OPTAB_DIRECT);
17227 tmp = expand_simple_binop (mode, PLUS,
17228 copy_rtx (tmp), GEN_INT (ct),
17229 copy_rtx (tmp), 1, OPTAB_DIRECT);
17232 if (!rtx_equal_p (tmp, out))
17233 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17240 enum machine_mode cmp_mode = GET_MODE (op0);
17243 tmp = ct, ct = cf, cf = tmp;
17246 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17248 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17250 /* We may be reversing unordered compare to normal compare, that
17251 is not valid in general (we may convert non-trapping condition
17252 to trapping one), however on i386 we currently emit all
17253 comparisons unordered. */
17254 compare_code = reverse_condition_maybe_unordered (compare_code);
17255 code = reverse_condition_maybe_unordered (code);
17259 compare_code = reverse_condition (compare_code);
17260 code = reverse_condition (code);
17264 compare_code = UNKNOWN;
17265 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17266 && CONST_INT_P (op1))
17268 if (op1 == const0_rtx
17269 && (code == LT || code == GE))
17270 compare_code = code;
17271 else if (op1 == constm1_rtx)
17275 else if (code == GT)
17280 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17281 if (compare_code != UNKNOWN
17282 && GET_MODE (op0) == GET_MODE (out)
17283 && (cf == -1 || ct == -1))
17285 /* If lea code below could be used, only optimize
17286 if it results in a 2 insn sequence. */
17288 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17289 || diff == 3 || diff == 5 || diff == 9)
17290 || (compare_code == LT && ct == -1)
17291 || (compare_code == GE && cf == -1))
17294 * notl op1 (if necessary)
17302 code = reverse_condition (code);
17305 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17307 out = expand_simple_binop (mode, IOR,
17309 out, 1, OPTAB_DIRECT);
17310 if (out != operands[0])
17311 emit_move_insn (operands[0], out);
17318 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17319 || diff == 3 || diff == 5 || diff == 9)
17320 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17322 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17328 * lea cf(dest*(ct-cf)),dest
17332 * This also catches the degenerate setcc-only case.
17338 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17341 /* On x86_64 the lea instruction operates on Pmode, so we need
17342 to get arithmetics done in proper mode to match. */
17344 tmp = copy_rtx (out);
17348 out1 = copy_rtx (out);
17349 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17353 tmp = gen_rtx_PLUS (mode, tmp, out1);
17359 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17362 if (!rtx_equal_p (tmp, out))
17365 out = force_operand (tmp, copy_rtx (out));
17367 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17369 if (!rtx_equal_p (out, operands[0]))
17370 emit_move_insn (operands[0], copy_rtx (out));
17376 * General case: Jumpful:
17377 * xorl dest,dest cmpl op1, op2
17378 * cmpl op1, op2 movl ct, dest
17379 * setcc dest jcc 1f
17380 * decl dest movl cf, dest
17381 * andl (cf-ct),dest 1:
17384 * Size 20. Size 14.
17386 * This is reasonably steep, but branch mispredict costs are
17387 * high on modern cpus, so consider failing only if optimizing
17391 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17392 && BRANCH_COST (optimize_insn_for_speed_p (),
17397 enum machine_mode cmp_mode = GET_MODE (op0);
17402 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17404 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17406 /* We may be reversing unordered compare to normal compare,
17407 that is not valid in general (we may convert non-trapping
17408 condition to trapping one), however on i386 we currently
17409 emit all comparisons unordered. */
17410 code = reverse_condition_maybe_unordered (code);
17414 code = reverse_condition (code);
17415 if (compare_code != UNKNOWN)
17416 compare_code = reverse_condition (compare_code);
17420 if (compare_code != UNKNOWN)
17422 /* notl op1 (if needed)
17427 For x < 0 (resp. x <= -1) there will be no notl,
17428 so if possible swap the constants to get rid of the
17430 True/false will be -1/0 while code below (store flag
17431 followed by decrement) is 0/-1, so the constants need
17432 to be exchanged once more. */
17434 if (compare_code == GE || !cf)
17436 code = reverse_condition (code);
17441 HOST_WIDE_INT tmp = cf;
17446 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17450 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17452 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17454 copy_rtx (out), 1, OPTAB_DIRECT);
17457 out = expand_simple_binop (mode, AND, copy_rtx (out),
17458 gen_int_mode (cf - ct, mode),
17459 copy_rtx (out), 1, OPTAB_DIRECT);
17461 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17462 copy_rtx (out), 1, OPTAB_DIRECT);
17463 if (!rtx_equal_p (out, operands[0]))
17464 emit_move_insn (operands[0], copy_rtx (out));
17470 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17472 /* Try a few things more with specific constants and a variable. */
17475 rtx var, orig_out, out, tmp;
17477 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17480 /* If one of the two operands is an interesting constant, load a
17481 constant with the above and mask it in with a logical operation. */
17483 if (CONST_INT_P (operands[2]))
17486 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17487 operands[3] = constm1_rtx, op = and_optab;
17488 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17489 operands[3] = const0_rtx, op = ior_optab;
17493 else if (CONST_INT_P (operands[3]))
17496 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17497 operands[2] = constm1_rtx, op = and_optab;
17498 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17499 operands[2] = const0_rtx, op = ior_optab;
17506 orig_out = operands[0];
17507 tmp = gen_reg_rtx (mode);
17510 /* Recurse to get the constant loaded. */
17511 if (ix86_expand_int_movcc (operands) == 0)
17514 /* Mask in the interesting variable. */
17515 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17517 if (!rtx_equal_p (out, orig_out))
17518 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17524 * For comparison with above,
17534 if (! nonimmediate_operand (operands[2], mode))
17535 operands[2] = force_reg (mode, operands[2]);
17536 if (! nonimmediate_operand (operands[3], mode))
17537 operands[3] = force_reg (mode, operands[3]);
17539 if (! register_operand (operands[2], VOIDmode)
17541 || ! register_operand (operands[3], VOIDmode)))
17542 operands[2] = force_reg (mode, operands[2]);
17545 && ! register_operand (operands[3], VOIDmode))
17546 operands[3] = force_reg (mode, operands[3]);
17548 emit_insn (compare_seq);
17549 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17550 gen_rtx_IF_THEN_ELSE (mode,
17551 compare_op, operands[2],
17556 /* Swap, force into registers, or otherwise massage the two operands
17557 to an sse comparison with a mask result. Thus we differ a bit from
17558 ix86_prepare_fp_compare_args which expects to produce a flags result.
17560 The DEST operand exists to help determine whether to commute commutative
17561 operators. The POP0/POP1 operands are updated in place. The new
17562 comparison code is returned, or UNKNOWN if not implementable. */
17564 static enum rtx_code
17565 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17566 rtx *pop0, rtx *pop1)
17574 /* We have no LTGT as an operator. We could implement it with
17575 NE & ORDERED, but this requires an extra temporary. It's
17576 not clear that it's worth it. */
17583 /* These are supported directly. */
17590 /* For commutative operators, try to canonicalize the destination
17591 operand to be first in the comparison - this helps reload to
17592 avoid extra moves. */
17593 if (!dest || !rtx_equal_p (dest, *pop1))
17601 /* These are not supported directly. Swap the comparison operands
17602 to transform into something that is supported. */
17606 code = swap_condition (code);
17610 gcc_unreachable ();
17616 /* Detect conditional moves that exactly match min/max operational
17617 semantics. Note that this is IEEE safe, as long as we don't
17618 interchange the operands.
17620 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17621 and TRUE if the operation is successful and instructions are emitted. */
17624 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17625 rtx cmp_op1, rtx if_true, rtx if_false)
17627 enum machine_mode mode;
17633 else if (code == UNGE)
17636 if_true = if_false;
17642 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17644 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17649 mode = GET_MODE (dest);
17651 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17652 but MODE may be a vector mode and thus not appropriate. */
17653 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17655 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17658 if_true = force_reg (mode, if_true);
17659 v = gen_rtvec (2, if_true, if_false);
17660 tmp = gen_rtx_UNSPEC (mode, v, u);
17664 code = is_min ? SMIN : SMAX;
17665 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17668 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17672 /* Expand an sse vector comparison. Return the register with the result. */
17675 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17676 rtx op_true, rtx op_false)
17678 enum machine_mode mode = GET_MODE (dest);
17681 cmp_op0 = force_reg (mode, cmp_op0);
17682 if (!nonimmediate_operand (cmp_op1, mode))
17683 cmp_op1 = force_reg (mode, cmp_op1);
17686 || reg_overlap_mentioned_p (dest, op_true)
17687 || reg_overlap_mentioned_p (dest, op_false))
17688 dest = gen_reg_rtx (mode);
17690 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17691 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17696 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17697 operations. This is used for both scalar and vector conditional moves. */
17700 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17702 enum machine_mode mode = GET_MODE (dest);
17705 if (op_false == CONST0_RTX (mode))
17707 op_true = force_reg (mode, op_true);
17708 x = gen_rtx_AND (mode, cmp, op_true);
17709 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17711 else if (op_true == CONST0_RTX (mode))
17713 op_false = force_reg (mode, op_false);
17714 x = gen_rtx_NOT (mode, cmp);
17715 x = gen_rtx_AND (mode, x, op_false);
17716 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17718 else if (TARGET_XOP)
17720 rtx pcmov = gen_rtx_SET (mode, dest,
17721 gen_rtx_IF_THEN_ELSE (mode, cmp,
17728 op_true = force_reg (mode, op_true);
17729 op_false = force_reg (mode, op_false);
17731 t2 = gen_reg_rtx (mode);
17733 t3 = gen_reg_rtx (mode);
17737 x = gen_rtx_AND (mode, op_true, cmp);
17738 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17740 x = gen_rtx_NOT (mode, cmp);
17741 x = gen_rtx_AND (mode, x, op_false);
17742 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17744 x = gen_rtx_IOR (mode, t3, t2);
17745 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17749 /* Expand a floating-point conditional move. Return true if successful. */
17752 ix86_expand_fp_movcc (rtx operands[])
17754 enum machine_mode mode = GET_MODE (operands[0]);
17755 enum rtx_code code = GET_CODE (operands[1]);
17756 rtx tmp, compare_op;
17757 rtx op0 = XEXP (operands[1], 0);
17758 rtx op1 = XEXP (operands[1], 1);
17760 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17762 enum machine_mode cmode;
17764 /* Since we've no cmove for sse registers, don't force bad register
17765 allocation just to gain access to it. Deny movcc when the
17766 comparison mode doesn't match the move mode. */
17767 cmode = GET_MODE (op0);
17768 if (cmode == VOIDmode)
17769 cmode = GET_MODE (op1);
17773 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17774 if (code == UNKNOWN)
17777 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17778 operands[2], operands[3]))
17781 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17782 operands[2], operands[3]);
17783 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17787 /* The floating point conditional move instructions don't directly
17788 support conditions resulting from a signed integer comparison. */
17790 compare_op = ix86_expand_compare (code, op0, op1);
17791 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17793 tmp = gen_reg_rtx (QImode);
17794 ix86_expand_setcc (tmp, code, op0, op1);
17796 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17799 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17800 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17801 operands[2], operands[3])));
17806 /* Expand a floating-point vector conditional move; a vcond operation
17807 rather than a movcc operation. */
17810 ix86_expand_fp_vcond (rtx operands[])
17812 enum rtx_code code = GET_CODE (operands[3]);
17815 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17816 &operands[4], &operands[5]);
17817 if (code == UNKNOWN)
17820 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17821 operands[5], operands[1], operands[2]))
17824 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17825 operands[1], operands[2]);
17826 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17830 /* Expand a signed/unsigned integral vector conditional move. */
17833 ix86_expand_int_vcond (rtx operands[])
17835 enum machine_mode mode = GET_MODE (operands[0]);
17836 enum rtx_code code = GET_CODE (operands[3]);
17837 bool negate = false;
17840 cop0 = operands[4];
17841 cop1 = operands[5];
17843 /* XOP supports all of the comparisons on all vector int types. */
17846 /* Canonicalize the comparison to EQ, GT, GTU. */
17857 code = reverse_condition (code);
17863 code = reverse_condition (code);
17869 code = swap_condition (code);
17870 x = cop0, cop0 = cop1, cop1 = x;
17874 gcc_unreachable ();
17877 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17878 if (mode == V2DImode)
17883 /* SSE4.1 supports EQ. */
17884 if (!TARGET_SSE4_1)
17890 /* SSE4.2 supports GT/GTU. */
17891 if (!TARGET_SSE4_2)
17896 gcc_unreachable ();
17900 /* Unsigned parallel compare is not supported by the hardware.
17901 Play some tricks to turn this into a signed comparison
17905 cop0 = force_reg (mode, cop0);
17913 rtx (*gen_sub3) (rtx, rtx, rtx);
17915 /* Subtract (-(INT MAX) - 1) from both operands to make
17917 mask = ix86_build_signbit_mask (mode, true, false);
17918 gen_sub3 = (mode == V4SImode
17919 ? gen_subv4si3 : gen_subv2di3);
17920 t1 = gen_reg_rtx (mode);
17921 emit_insn (gen_sub3 (t1, cop0, mask));
17923 t2 = gen_reg_rtx (mode);
17924 emit_insn (gen_sub3 (t2, cop1, mask));
17934 /* Perform a parallel unsigned saturating subtraction. */
17935 x = gen_reg_rtx (mode);
17936 emit_insn (gen_rtx_SET (VOIDmode, x,
17937 gen_rtx_US_MINUS (mode, cop0, cop1)));
17940 cop1 = CONST0_RTX (mode);
17946 gcc_unreachable ();
17951 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17952 operands[1+negate], operands[2-negate]);
17954 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17955 operands[2-negate]);
17959 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17960 true if we should do zero extension, else sign extension. HIGH_P is
17961 true if we want the N/2 high elements, else the low elements. */
17964 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17966 enum machine_mode imode = GET_MODE (operands[1]);
17967 rtx (*unpack)(rtx, rtx, rtx);
17974 unpack = gen_vec_interleave_highv16qi;
17976 unpack = gen_vec_interleave_lowv16qi;
17980 unpack = gen_vec_interleave_highv8hi;
17982 unpack = gen_vec_interleave_lowv8hi;
17986 unpack = gen_vec_interleave_highv4si;
17988 unpack = gen_vec_interleave_lowv4si;
17991 gcc_unreachable ();
17994 dest = gen_lowpart (imode, operands[0]);
17997 se = force_reg (imode, CONST0_RTX (imode));
17999 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18000 operands[1], pc_rtx, pc_rtx);
18002 emit_insn (unpack (dest, operands[1], se));
18005 /* This function performs the same task as ix86_expand_sse_unpack,
18006 but with SSE4.1 instructions. */
18009 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18011 enum machine_mode imode = GET_MODE (operands[1]);
18012 rtx (*unpack)(rtx, rtx);
18019 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18021 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18025 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18027 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18031 unpack = gen_sse4_1_zero_extendv2siv2di2;
18033 unpack = gen_sse4_1_sign_extendv2siv2di2;
18036 gcc_unreachable ();
18039 dest = operands[0];
18042 /* Shift higher 8 bytes to lower 8 bytes. */
18043 src = gen_reg_rtx (imode);
18044 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18045 gen_lowpart (V1TImode, operands[1]),
18051 emit_insn (unpack (dest, src));
18054 /* Expand conditional increment or decrement using adb/sbb instructions.
18055 The default case using setcc followed by the conditional move can be
18056 done by generic code. */
18058 ix86_expand_int_addcc (rtx operands[])
18060 enum rtx_code code = GET_CODE (operands[1]);
18062 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18064 rtx val = const0_rtx;
18065 bool fpcmp = false;
18066 enum machine_mode mode;
18067 rtx op0 = XEXP (operands[1], 0);
18068 rtx op1 = XEXP (operands[1], 1);
18070 if (operands[3] != const1_rtx
18071 && operands[3] != constm1_rtx)
18073 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18075 code = GET_CODE (compare_op);
18077 flags = XEXP (compare_op, 0);
18079 if (GET_MODE (flags) == CCFPmode
18080 || GET_MODE (flags) == CCFPUmode)
18083 code = ix86_fp_compare_code_to_integer (code);
18090 PUT_CODE (compare_op,
18091 reverse_condition_maybe_unordered
18092 (GET_CODE (compare_op)));
18094 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18097 mode = GET_MODE (operands[0]);
18099 /* Construct either adc or sbb insn. */
18100 if ((code == LTU) == (operands[3] == constm1_rtx))
18105 insn = gen_subqi3_carry;
18108 insn = gen_subhi3_carry;
18111 insn = gen_subsi3_carry;
18114 insn = gen_subdi3_carry;
18117 gcc_unreachable ();
18125 insn = gen_addqi3_carry;
18128 insn = gen_addhi3_carry;
18131 insn = gen_addsi3_carry;
18134 insn = gen_adddi3_carry;
18137 gcc_unreachable ();
18140 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18146 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18147 but works for floating pointer parameters and nonoffsetable memories.
18148 For pushes, it returns just stack offsets; the values will be saved
18149 in the right order. Maximally three parts are generated. */
18152 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18157 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18159 size = (GET_MODE_SIZE (mode) + 4) / 8;
18161 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18162 gcc_assert (size >= 2 && size <= 4);
18164 /* Optimize constant pool reference to immediates. This is used by fp
18165 moves, that force all constants to memory to allow combining. */
18166 if (MEM_P (operand) && MEM_READONLY_P (operand))
18168 rtx tmp = maybe_get_pool_constant (operand);
18173 if (MEM_P (operand) && !offsettable_memref_p (operand))
18175 /* The only non-offsetable memories we handle are pushes. */
18176 int ok = push_operand (operand, VOIDmode);
18180 operand = copy_rtx (operand);
18181 PUT_MODE (operand, Pmode);
18182 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18186 if (GET_CODE (operand) == CONST_VECTOR)
18188 enum machine_mode imode = int_mode_for_mode (mode);
18189 /* Caution: if we looked through a constant pool memory above,
18190 the operand may actually have a different mode now. That's
18191 ok, since we want to pun this all the way back to an integer. */
18192 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18193 gcc_assert (operand != NULL);
18199 if (mode == DImode)
18200 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18205 if (REG_P (operand))
18207 gcc_assert (reload_completed);
18208 for (i = 0; i < size; i++)
18209 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18211 else if (offsettable_memref_p (operand))
18213 operand = adjust_address (operand, SImode, 0);
18214 parts[0] = operand;
18215 for (i = 1; i < size; i++)
18216 parts[i] = adjust_address (operand, SImode, 4 * i);
18218 else if (GET_CODE (operand) == CONST_DOUBLE)
18223 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18227 real_to_target (l, &r, mode);
18228 parts[3] = gen_int_mode (l[3], SImode);
18229 parts[2] = gen_int_mode (l[2], SImode);
18232 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18233 parts[2] = gen_int_mode (l[2], SImode);
18236 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18239 gcc_unreachable ();
18241 parts[1] = gen_int_mode (l[1], SImode);
18242 parts[0] = gen_int_mode (l[0], SImode);
18245 gcc_unreachable ();
18250 if (mode == TImode)
18251 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18252 if (mode == XFmode || mode == TFmode)
18254 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18255 if (REG_P (operand))
18257 gcc_assert (reload_completed);
18258 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18259 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18261 else if (offsettable_memref_p (operand))
18263 operand = adjust_address (operand, DImode, 0);
18264 parts[0] = operand;
18265 parts[1] = adjust_address (operand, upper_mode, 8);
18267 else if (GET_CODE (operand) == CONST_DOUBLE)
18272 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18273 real_to_target (l, &r, mode);
18275 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18276 if (HOST_BITS_PER_WIDE_INT >= 64)
18279 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18280 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18283 parts[0] = immed_double_const (l[0], l[1], DImode);
18285 if (upper_mode == SImode)
18286 parts[1] = gen_int_mode (l[2], SImode);
18287 else if (HOST_BITS_PER_WIDE_INT >= 64)
18290 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18291 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18294 parts[1] = immed_double_const (l[2], l[3], DImode);
18297 gcc_unreachable ();
18304 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18305 Return false when normal moves are needed; true when all required
18306 insns have been emitted. Operands 2-4 contain the input values
18307 int the correct order; operands 5-7 contain the output values. */
18310 ix86_split_long_move (rtx operands[])
18315 int collisions = 0;
18316 enum machine_mode mode = GET_MODE (operands[0]);
18317 bool collisionparts[4];
18319 /* The DFmode expanders may ask us to move double.
18320 For 64bit target this is single move. By hiding the fact
18321 here we simplify i386.md splitters. */
18322 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18324 /* Optimize constant pool reference to immediates. This is used by
18325 fp moves, that force all constants to memory to allow combining. */
18327 if (MEM_P (operands[1])
18328 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18329 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18330 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18331 if (push_operand (operands[0], VOIDmode))
18333 operands[0] = copy_rtx (operands[0]);
18334 PUT_MODE (operands[0], Pmode);
18337 operands[0] = gen_lowpart (DImode, operands[0]);
18338 operands[1] = gen_lowpart (DImode, operands[1]);
18339 emit_move_insn (operands[0], operands[1]);
18343 /* The only non-offsettable memory we handle is push. */
18344 if (push_operand (operands[0], VOIDmode))
18347 gcc_assert (!MEM_P (operands[0])
18348 || offsettable_memref_p (operands[0]));
18350 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18351 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18353 /* When emitting push, take care for source operands on the stack. */
18354 if (push && MEM_P (operands[1])
18355 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18357 rtx src_base = XEXP (part[1][nparts - 1], 0);
18359 /* Compensate for the stack decrement by 4. */
18360 if (!TARGET_64BIT && nparts == 3
18361 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18362 src_base = plus_constant (src_base, 4);
18364 /* src_base refers to the stack pointer and is
18365 automatically decreased by emitted push. */
18366 for (i = 0; i < nparts; i++)
18367 part[1][i] = change_address (part[1][i],
18368 GET_MODE (part[1][i]), src_base);
18371 /* We need to do copy in the right order in case an address register
18372 of the source overlaps the destination. */
18373 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18377 for (i = 0; i < nparts; i++)
18380 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18381 if (collisionparts[i])
18385 /* Collision in the middle part can be handled by reordering. */
18386 if (collisions == 1 && nparts == 3 && collisionparts [1])
18388 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18389 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18391 else if (collisions == 1
18393 && (collisionparts [1] || collisionparts [2]))
18395 if (collisionparts [1])
18397 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18398 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18402 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18403 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18407 /* If there are more collisions, we can't handle it by reordering.
18408 Do an lea to the last part and use only one colliding move. */
18409 else if (collisions > 1)
18415 base = part[0][nparts - 1];
18417 /* Handle the case when the last part isn't valid for lea.
18418 Happens in 64-bit mode storing the 12-byte XFmode. */
18419 if (GET_MODE (base) != Pmode)
18420 base = gen_rtx_REG (Pmode, REGNO (base));
18422 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18423 part[1][0] = replace_equiv_address (part[1][0], base);
18424 for (i = 1; i < nparts; i++)
18426 tmp = plus_constant (base, UNITS_PER_WORD * i);
18427 part[1][i] = replace_equiv_address (part[1][i], tmp);
18438 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18439 emit_insn (gen_addsi3 (stack_pointer_rtx,
18440 stack_pointer_rtx, GEN_INT (-4)));
18441 emit_move_insn (part[0][2], part[1][2]);
18443 else if (nparts == 4)
18445 emit_move_insn (part[0][3], part[1][3]);
18446 emit_move_insn (part[0][2], part[1][2]);
18451 /* In 64bit mode we don't have 32bit push available. In case this is
18452 register, it is OK - we will just use larger counterpart. We also
18453 retype memory - these comes from attempt to avoid REX prefix on
18454 moving of second half of TFmode value. */
18455 if (GET_MODE (part[1][1]) == SImode)
18457 switch (GET_CODE (part[1][1]))
18460 part[1][1] = adjust_address (part[1][1], DImode, 0);
18464 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18468 gcc_unreachable ();
18471 if (GET_MODE (part[1][0]) == SImode)
18472 part[1][0] = part[1][1];
18475 emit_move_insn (part[0][1], part[1][1]);
18476 emit_move_insn (part[0][0], part[1][0]);
18480 /* Choose correct order to not overwrite the source before it is copied. */
18481 if ((REG_P (part[0][0])
18482 && REG_P (part[1][1])
18483 && (REGNO (part[0][0]) == REGNO (part[1][1])
18485 && REGNO (part[0][0]) == REGNO (part[1][2]))
18487 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18489 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18491 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18493 operands[2 + i] = part[0][j];
18494 operands[6 + i] = part[1][j];
18499 for (i = 0; i < nparts; i++)
18501 operands[2 + i] = part[0][i];
18502 operands[6 + i] = part[1][i];
18506 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18507 if (optimize_insn_for_size_p ())
18509 for (j = 0; j < nparts - 1; j++)
18510 if (CONST_INT_P (operands[6 + j])
18511 && operands[6 + j] != const0_rtx
18512 && REG_P (operands[2 + j]))
18513 for (i = j; i < nparts - 1; i++)
18514 if (CONST_INT_P (operands[7 + i])
18515 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18516 operands[7 + i] = operands[2 + j];
18519 for (i = 0; i < nparts; i++)
18520 emit_move_insn (operands[2 + i], operands[6 + i]);
18525 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18526 left shift by a constant, either using a single shift or
18527 a sequence of add instructions. */
18530 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18532 rtx (*insn)(rtx, rtx, rtx);
18535 || (count * ix86_cost->add <= ix86_cost->shift_const
18536 && !optimize_insn_for_size_p ()))
18538 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
18539 while (count-- > 0)
18540 emit_insn (insn (operand, operand, operand));
18544 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18545 emit_insn (insn (operand, operand, GEN_INT (count)));
18550 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18552 rtx (*gen_ashl3)(rtx, rtx, rtx);
18553 rtx (*gen_shld)(rtx, rtx, rtx);
18554 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18556 rtx low[2], high[2];
18559 if (CONST_INT_P (operands[2]))
18561 split_double_mode (mode, operands, 2, low, high);
18562 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18564 if (count >= half_width)
18566 emit_move_insn (high[0], low[1]);
18567 emit_move_insn (low[0], const0_rtx);
18569 if (count > half_width)
18570 ix86_expand_ashl_const (high[0], count - half_width, mode);
18574 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18576 if (!rtx_equal_p (operands[0], operands[1]))
18577 emit_move_insn (operands[0], operands[1]);
18579 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
18580 ix86_expand_ashl_const (low[0], count, mode);
18585 split_double_mode (mode, operands, 1, low, high);
18587 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18589 if (operands[1] == const1_rtx)
18591 /* Assuming we've chosen a QImode capable registers, then 1 << N
18592 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18593 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18595 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18597 ix86_expand_clear (low[0]);
18598 ix86_expand_clear (high[0]);
18599 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
18601 d = gen_lowpart (QImode, low[0]);
18602 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18603 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18604 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18606 d = gen_lowpart (QImode, high[0]);
18607 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18608 s = gen_rtx_NE (QImode, flags, const0_rtx);
18609 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18612 /* Otherwise, we can get the same results by manually performing
18613 a bit extract operation on bit 5/6, and then performing the two
18614 shifts. The two methods of getting 0/1 into low/high are exactly
18615 the same size. Avoiding the shift in the bit extract case helps
18616 pentium4 a bit; no one else seems to care much either way. */
18619 enum machine_mode half_mode;
18620 rtx (*gen_lshr3)(rtx, rtx, rtx);
18621 rtx (*gen_and3)(rtx, rtx, rtx);
18622 rtx (*gen_xor3)(rtx, rtx, rtx);
18623 HOST_WIDE_INT bits;
18626 if (mode == DImode)
18628 half_mode = SImode;
18629 gen_lshr3 = gen_lshrsi3;
18630 gen_and3 = gen_andsi3;
18631 gen_xor3 = gen_xorsi3;
18636 half_mode = DImode;
18637 gen_lshr3 = gen_lshrdi3;
18638 gen_and3 = gen_anddi3;
18639 gen_xor3 = gen_xordi3;
18643 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18644 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
18646 x = gen_lowpart (half_mode, operands[2]);
18647 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18649 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
18650 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
18651 emit_move_insn (low[0], high[0]);
18652 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
18655 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18656 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
18660 if (operands[1] == constm1_rtx)
18662 /* For -1 << N, we can avoid the shld instruction, because we
18663 know that we're shifting 0...31/63 ones into a -1. */
18664 emit_move_insn (low[0], constm1_rtx);
18665 if (optimize_insn_for_size_p ())
18666 emit_move_insn (high[0], low[0]);
18668 emit_move_insn (high[0], constm1_rtx);
18672 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18674 if (!rtx_equal_p (operands[0], operands[1]))
18675 emit_move_insn (operands[0], operands[1]);
18677 split_double_mode (mode, operands, 1, low, high);
18678 emit_insn (gen_shld (high[0], low[0], operands[2]));
18681 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18683 if (TARGET_CMOVE && scratch)
18685 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18686 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18688 ix86_expand_clear (scratch);
18689 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
18693 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18694 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18696 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
18701 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18703 rtx (*gen_ashr3)(rtx, rtx, rtx)
18704 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
18705 rtx (*gen_shrd)(rtx, rtx, rtx);
18706 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18708 rtx low[2], high[2];
18711 if (CONST_INT_P (operands[2]))
18713 split_double_mode (mode, operands, 2, low, high);
18714 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18716 if (count == GET_MODE_BITSIZE (mode) - 1)
18718 emit_move_insn (high[0], high[1]);
18719 emit_insn (gen_ashr3 (high[0], high[0],
18720 GEN_INT (half_width - 1)));
18721 emit_move_insn (low[0], high[0]);
18724 else if (count >= half_width)
18726 emit_move_insn (low[0], high[1]);
18727 emit_move_insn (high[0], low[0]);
18728 emit_insn (gen_ashr3 (high[0], high[0],
18729 GEN_INT (half_width - 1)));
18731 if (count > half_width)
18732 emit_insn (gen_ashr3 (low[0], low[0],
18733 GEN_INT (count - half_width)));
18737 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18739 if (!rtx_equal_p (operands[0], operands[1]))
18740 emit_move_insn (operands[0], operands[1]);
18742 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18743 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
18748 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18750 if (!rtx_equal_p (operands[0], operands[1]))
18751 emit_move_insn (operands[0], operands[1]);
18753 split_double_mode (mode, operands, 1, low, high);
18755 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18756 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
18758 if (TARGET_CMOVE && scratch)
18760 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18761 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18763 emit_move_insn (scratch, high[0]);
18764 emit_insn (gen_ashr3 (scratch, scratch,
18765 GEN_INT (half_width - 1)));
18766 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18771 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
18772 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
18774 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
18780 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18782 rtx (*gen_lshr3)(rtx, rtx, rtx)
18783 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
18784 rtx (*gen_shrd)(rtx, rtx, rtx);
18785 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18787 rtx low[2], high[2];
18790 if (CONST_INT_P (operands[2]))
18792 split_double_mode (mode, operands, 2, low, high);
18793 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18795 if (count >= half_width)
18797 emit_move_insn (low[0], high[1]);
18798 ix86_expand_clear (high[0]);
18800 if (count > half_width)
18801 emit_insn (gen_lshr3 (low[0], low[0],
18802 GEN_INT (count - half_width)));
18806 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18808 if (!rtx_equal_p (operands[0], operands[1]))
18809 emit_move_insn (operands[0], operands[1]);
18811 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18812 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
18817 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18819 if (!rtx_equal_p (operands[0], operands[1]))
18820 emit_move_insn (operands[0], operands[1]);
18822 split_double_mode (mode, operands, 1, low, high);
18824 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18825 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
18827 if (TARGET_CMOVE && scratch)
18829 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18830 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18832 ix86_expand_clear (scratch);
18833 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18838 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18839 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18841 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
18846 /* Predict just emitted jump instruction to be taken with probability PROB. */
18848 predict_jump (int prob)
18850 rtx insn = get_last_insn ();
18851 gcc_assert (JUMP_P (insn));
18852 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18855 /* Helper function for the string operations below. Dest VARIABLE whether
18856 it is aligned to VALUE bytes. If true, jump to the label. */
18858 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18860 rtx label = gen_label_rtx ();
18861 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18862 if (GET_MODE (variable) == DImode)
18863 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18865 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18866 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18869 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18871 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18875 /* Adjust COUNTER by the VALUE. */
18877 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18879 rtx (*gen_add)(rtx, rtx, rtx)
18880 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
18882 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
18885 /* Zero extend possibly SImode EXP to Pmode register. */
18887 ix86_zero_extend_to_Pmode (rtx exp)
18890 if (GET_MODE (exp) == VOIDmode)
18891 return force_reg (Pmode, exp);
18892 if (GET_MODE (exp) == Pmode)
18893 return copy_to_mode_reg (Pmode, exp);
18894 r = gen_reg_rtx (Pmode);
18895 emit_insn (gen_zero_extendsidi2 (r, exp));
18899 /* Divide COUNTREG by SCALE. */
18901 scale_counter (rtx countreg, int scale)
18907 if (CONST_INT_P (countreg))
18908 return GEN_INT (INTVAL (countreg) / scale);
18909 gcc_assert (REG_P (countreg));
18911 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18912 GEN_INT (exact_log2 (scale)),
18913 NULL, 1, OPTAB_DIRECT);
18917 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18918 DImode for constant loop counts. */
18920 static enum machine_mode
18921 counter_mode (rtx count_exp)
18923 if (GET_MODE (count_exp) != VOIDmode)
18924 return GET_MODE (count_exp);
18925 if (!CONST_INT_P (count_exp))
18927 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18932 /* When SRCPTR is non-NULL, output simple loop to move memory
18933 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18934 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18935 equivalent loop to set memory by VALUE (supposed to be in MODE).
18937 The size is rounded down to whole number of chunk size moved at once.
18938 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18942 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18943 rtx destptr, rtx srcptr, rtx value,
18944 rtx count, enum machine_mode mode, int unroll,
18947 rtx out_label, top_label, iter, tmp;
18948 enum machine_mode iter_mode = counter_mode (count);
18949 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18950 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18956 top_label = gen_label_rtx ();
18957 out_label = gen_label_rtx ();
18958 iter = gen_reg_rtx (iter_mode);
18960 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18961 NULL, 1, OPTAB_DIRECT);
18962 /* Those two should combine. */
18963 if (piece_size == const1_rtx)
18965 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18967 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18969 emit_move_insn (iter, const0_rtx);
18971 emit_label (top_label);
18973 tmp = convert_modes (Pmode, iter_mode, iter, true);
18974 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18975 destmem = change_address (destmem, mode, x_addr);
18979 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18980 srcmem = change_address (srcmem, mode, y_addr);
18982 /* When unrolling for chips that reorder memory reads and writes,
18983 we can save registers by using single temporary.
18984 Also using 4 temporaries is overkill in 32bit mode. */
18985 if (!TARGET_64BIT && 0)
18987 for (i = 0; i < unroll; i++)
18992 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18994 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18996 emit_move_insn (destmem, srcmem);
19002 gcc_assert (unroll <= 4);
19003 for (i = 0; i < unroll; i++)
19005 tmpreg[i] = gen_reg_rtx (mode);
19009 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19011 emit_move_insn (tmpreg[i], srcmem);
19013 for (i = 0; i < unroll; i++)
19018 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19020 emit_move_insn (destmem, tmpreg[i]);
19025 for (i = 0; i < unroll; i++)
19029 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19030 emit_move_insn (destmem, value);
19033 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19034 true, OPTAB_LIB_WIDEN);
19036 emit_move_insn (iter, tmp);
19038 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19040 if (expected_size != -1)
19042 expected_size /= GET_MODE_SIZE (mode) * unroll;
19043 if (expected_size == 0)
19045 else if (expected_size > REG_BR_PROB_BASE)
19046 predict_jump (REG_BR_PROB_BASE - 1);
19048 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19051 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19052 iter = ix86_zero_extend_to_Pmode (iter);
19053 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19054 true, OPTAB_LIB_WIDEN);
19055 if (tmp != destptr)
19056 emit_move_insn (destptr, tmp);
19059 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19060 true, OPTAB_LIB_WIDEN);
19062 emit_move_insn (srcptr, tmp);
19064 emit_label (out_label);
19067 /* Output "rep; mov" instruction.
19068 Arguments have same meaning as for previous function */
19070 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19071 rtx destptr, rtx srcptr,
19073 enum machine_mode mode)
19079 /* If the size is known, it is shorter to use rep movs. */
19080 if (mode == QImode && CONST_INT_P (count)
19081 && !(INTVAL (count) & 3))
19084 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19085 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19086 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19087 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19088 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19089 if (mode != QImode)
19091 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19092 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19093 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19094 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19095 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19096 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19100 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19101 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19103 if (CONST_INT_P (count))
19105 count = GEN_INT (INTVAL (count)
19106 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19107 destmem = shallow_copy_rtx (destmem);
19108 srcmem = shallow_copy_rtx (srcmem);
19109 set_mem_size (destmem, count);
19110 set_mem_size (srcmem, count);
19114 if (MEM_SIZE (destmem))
19115 set_mem_size (destmem, NULL_RTX);
19116 if (MEM_SIZE (srcmem))
19117 set_mem_size (srcmem, NULL_RTX);
19119 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19123 /* Output "rep; stos" instruction.
19124 Arguments have same meaning as for previous function */
19126 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19127 rtx count, enum machine_mode mode,
19133 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19134 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19135 value = force_reg (mode, gen_lowpart (mode, value));
19136 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19137 if (mode != QImode)
19139 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19140 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19141 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19144 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19145 if (orig_value == const0_rtx && CONST_INT_P (count))
19147 count = GEN_INT (INTVAL (count)
19148 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19149 destmem = shallow_copy_rtx (destmem);
19150 set_mem_size (destmem, count);
19152 else if (MEM_SIZE (destmem))
19153 set_mem_size (destmem, NULL_RTX);
19154 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19158 emit_strmov (rtx destmem, rtx srcmem,
19159 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19161 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19162 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19163 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19166 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19168 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19169 rtx destptr, rtx srcptr, rtx count, int max_size)
19172 if (CONST_INT_P (count))
19174 HOST_WIDE_INT countval = INTVAL (count);
19177 if ((countval & 0x10) && max_size > 16)
19181 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19182 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19185 gcc_unreachable ();
19188 if ((countval & 0x08) && max_size > 8)
19191 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19194 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19195 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19199 if ((countval & 0x04) && max_size > 4)
19201 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19204 if ((countval & 0x02) && max_size > 2)
19206 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19209 if ((countval & 0x01) && max_size > 1)
19211 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19218 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19219 count, 1, OPTAB_DIRECT);
19220 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19221 count, QImode, 1, 4);
19225 /* When there are stringops, we can cheaply increase dest and src pointers.
19226 Otherwise we save code size by maintaining offset (zero is readily
19227 available from preceding rep operation) and using x86 addressing modes.
19229 if (TARGET_SINGLE_STRINGOP)
19233 rtx label = ix86_expand_aligntest (count, 4, true);
19234 src = change_address (srcmem, SImode, srcptr);
19235 dest = change_address (destmem, SImode, destptr);
19236 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19237 emit_label (label);
19238 LABEL_NUSES (label) = 1;
19242 rtx label = ix86_expand_aligntest (count, 2, true);
19243 src = change_address (srcmem, HImode, srcptr);
19244 dest = change_address (destmem, HImode, destptr);
19245 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19246 emit_label (label);
19247 LABEL_NUSES (label) = 1;
19251 rtx label = ix86_expand_aligntest (count, 1, true);
19252 src = change_address (srcmem, QImode, srcptr);
19253 dest = change_address (destmem, QImode, destptr);
19254 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19255 emit_label (label);
19256 LABEL_NUSES (label) = 1;
19261 rtx offset = force_reg (Pmode, const0_rtx);
19266 rtx label = ix86_expand_aligntest (count, 4, true);
19267 src = change_address (srcmem, SImode, srcptr);
19268 dest = change_address (destmem, SImode, destptr);
19269 emit_move_insn (dest, src);
19270 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19271 true, OPTAB_LIB_WIDEN);
19273 emit_move_insn (offset, tmp);
19274 emit_label (label);
19275 LABEL_NUSES (label) = 1;
19279 rtx label = ix86_expand_aligntest (count, 2, true);
19280 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19281 src = change_address (srcmem, HImode, tmp);
19282 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19283 dest = change_address (destmem, HImode, tmp);
19284 emit_move_insn (dest, src);
19285 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19286 true, OPTAB_LIB_WIDEN);
19288 emit_move_insn (offset, tmp);
19289 emit_label (label);
19290 LABEL_NUSES (label) = 1;
19294 rtx label = ix86_expand_aligntest (count, 1, true);
19295 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19296 src = change_address (srcmem, QImode, tmp);
19297 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19298 dest = change_address (destmem, QImode, tmp);
19299 emit_move_insn (dest, src);
19300 emit_label (label);
19301 LABEL_NUSES (label) = 1;
19306 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19308 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19309 rtx count, int max_size)
19312 expand_simple_binop (counter_mode (count), AND, count,
19313 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19314 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19315 gen_lowpart (QImode, value), count, QImode,
19319 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19321 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19325 if (CONST_INT_P (count))
19327 HOST_WIDE_INT countval = INTVAL (count);
19330 if ((countval & 0x10) && max_size > 16)
19334 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19335 emit_insn (gen_strset (destptr, dest, value));
19336 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19337 emit_insn (gen_strset (destptr, dest, value));
19340 gcc_unreachable ();
19343 if ((countval & 0x08) && max_size > 8)
19347 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19348 emit_insn (gen_strset (destptr, dest, value));
19352 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19353 emit_insn (gen_strset (destptr, dest, value));
19354 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19355 emit_insn (gen_strset (destptr, dest, value));
19359 if ((countval & 0x04) && max_size > 4)
19361 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19362 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19365 if ((countval & 0x02) && max_size > 2)
19367 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19368 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19371 if ((countval & 0x01) && max_size > 1)
19373 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19374 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19381 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19386 rtx label = ix86_expand_aligntest (count, 16, true);
19389 dest = change_address (destmem, DImode, destptr);
19390 emit_insn (gen_strset (destptr, dest, value));
19391 emit_insn (gen_strset (destptr, dest, value));
19395 dest = change_address (destmem, SImode, destptr);
19396 emit_insn (gen_strset (destptr, dest, value));
19397 emit_insn (gen_strset (destptr, dest, value));
19398 emit_insn (gen_strset (destptr, dest, value));
19399 emit_insn (gen_strset (destptr, dest, value));
19401 emit_label (label);
19402 LABEL_NUSES (label) = 1;
19406 rtx label = ix86_expand_aligntest (count, 8, true);
19409 dest = change_address (destmem, DImode, destptr);
19410 emit_insn (gen_strset (destptr, dest, value));
19414 dest = change_address (destmem, SImode, destptr);
19415 emit_insn (gen_strset (destptr, dest, value));
19416 emit_insn (gen_strset (destptr, dest, value));
19418 emit_label (label);
19419 LABEL_NUSES (label) = 1;
19423 rtx label = ix86_expand_aligntest (count, 4, true);
19424 dest = change_address (destmem, SImode, destptr);
19425 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19426 emit_label (label);
19427 LABEL_NUSES (label) = 1;
19431 rtx label = ix86_expand_aligntest (count, 2, true);
19432 dest = change_address (destmem, HImode, destptr);
19433 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19434 emit_label (label);
19435 LABEL_NUSES (label) = 1;
19439 rtx label = ix86_expand_aligntest (count, 1, true);
19440 dest = change_address (destmem, QImode, destptr);
19441 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19442 emit_label (label);
19443 LABEL_NUSES (label) = 1;
19447 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19448 DESIRED_ALIGNMENT. */
19450 expand_movmem_prologue (rtx destmem, rtx srcmem,
19451 rtx destptr, rtx srcptr, rtx count,
19452 int align, int desired_alignment)
19454 if (align <= 1 && desired_alignment > 1)
19456 rtx label = ix86_expand_aligntest (destptr, 1, false);
19457 srcmem = change_address (srcmem, QImode, srcptr);
19458 destmem = change_address (destmem, QImode, destptr);
19459 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19460 ix86_adjust_counter (count, 1);
19461 emit_label (label);
19462 LABEL_NUSES (label) = 1;
19464 if (align <= 2 && desired_alignment > 2)
19466 rtx label = ix86_expand_aligntest (destptr, 2, false);
19467 srcmem = change_address (srcmem, HImode, srcptr);
19468 destmem = change_address (destmem, HImode, destptr);
19469 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19470 ix86_adjust_counter (count, 2);
19471 emit_label (label);
19472 LABEL_NUSES (label) = 1;
19474 if (align <= 4 && desired_alignment > 4)
19476 rtx label = ix86_expand_aligntest (destptr, 4, false);
19477 srcmem = change_address (srcmem, SImode, srcptr);
19478 destmem = change_address (destmem, SImode, destptr);
19479 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19480 ix86_adjust_counter (count, 4);
19481 emit_label (label);
19482 LABEL_NUSES (label) = 1;
19484 gcc_assert (desired_alignment <= 8);
19487 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19488 ALIGN_BYTES is how many bytes need to be copied. */
19490 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19491 int desired_align, int align_bytes)
19494 rtx src_size, dst_size;
19496 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19497 if (src_align_bytes >= 0)
19498 src_align_bytes = desired_align - src_align_bytes;
19499 src_size = MEM_SIZE (src);
19500 dst_size = MEM_SIZE (dst);
19501 if (align_bytes & 1)
19503 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19504 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19506 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19508 if (align_bytes & 2)
19510 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19511 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19512 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19513 set_mem_align (dst, 2 * BITS_PER_UNIT);
19514 if (src_align_bytes >= 0
19515 && (src_align_bytes & 1) == (align_bytes & 1)
19516 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19517 set_mem_align (src, 2 * BITS_PER_UNIT);
19519 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19521 if (align_bytes & 4)
19523 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19524 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19525 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19526 set_mem_align (dst, 4 * BITS_PER_UNIT);
19527 if (src_align_bytes >= 0)
19529 unsigned int src_align = 0;
19530 if ((src_align_bytes & 3) == (align_bytes & 3))
19532 else if ((src_align_bytes & 1) == (align_bytes & 1))
19534 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19535 set_mem_align (src, src_align * BITS_PER_UNIT);
19538 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19540 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19541 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19542 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19543 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19544 if (src_align_bytes >= 0)
19546 unsigned int src_align = 0;
19547 if ((src_align_bytes & 7) == (align_bytes & 7))
19549 else if ((src_align_bytes & 3) == (align_bytes & 3))
19551 else if ((src_align_bytes & 1) == (align_bytes & 1))
19553 if (src_align > (unsigned int) desired_align)
19554 src_align = desired_align;
19555 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19556 set_mem_align (src, src_align * BITS_PER_UNIT);
19559 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19561 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19566 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19567 DESIRED_ALIGNMENT. */
19569 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19570 int align, int desired_alignment)
19572 if (align <= 1 && desired_alignment > 1)
19574 rtx label = ix86_expand_aligntest (destptr, 1, false);
19575 destmem = change_address (destmem, QImode, destptr);
19576 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19577 ix86_adjust_counter (count, 1);
19578 emit_label (label);
19579 LABEL_NUSES (label) = 1;
19581 if (align <= 2 && desired_alignment > 2)
19583 rtx label = ix86_expand_aligntest (destptr, 2, false);
19584 destmem = change_address (destmem, HImode, destptr);
19585 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19586 ix86_adjust_counter (count, 2);
19587 emit_label (label);
19588 LABEL_NUSES (label) = 1;
19590 if (align <= 4 && desired_alignment > 4)
19592 rtx label = ix86_expand_aligntest (destptr, 4, false);
19593 destmem = change_address (destmem, SImode, destptr);
19594 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19595 ix86_adjust_counter (count, 4);
19596 emit_label (label);
19597 LABEL_NUSES (label) = 1;
19599 gcc_assert (desired_alignment <= 8);
19602 /* Set enough from DST to align DST known to by aligned by ALIGN to
19603 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19605 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19606 int desired_align, int align_bytes)
19609 rtx dst_size = MEM_SIZE (dst);
19610 if (align_bytes & 1)
19612 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19614 emit_insn (gen_strset (destreg, dst,
19615 gen_lowpart (QImode, value)));
19617 if (align_bytes & 2)
19619 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19620 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19621 set_mem_align (dst, 2 * BITS_PER_UNIT);
19623 emit_insn (gen_strset (destreg, dst,
19624 gen_lowpart (HImode, value)));
19626 if (align_bytes & 4)
19628 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19629 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19630 set_mem_align (dst, 4 * BITS_PER_UNIT);
19632 emit_insn (gen_strset (destreg, dst,
19633 gen_lowpart (SImode, value)));
19635 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19636 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19637 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19639 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19643 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19644 static enum stringop_alg
19645 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19646 int *dynamic_check)
19648 const struct stringop_algs * algs;
19649 bool optimize_for_speed;
19650 /* Algorithms using the rep prefix want at least edi and ecx;
19651 additionally, memset wants eax and memcpy wants esi. Don't
19652 consider such algorithms if the user has appropriated those
19653 registers for their own purposes. */
19654 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19656 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19658 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19659 || (alg != rep_prefix_1_byte \
19660 && alg != rep_prefix_4_byte \
19661 && alg != rep_prefix_8_byte))
19662 const struct processor_costs *cost;
19664 /* Even if the string operation call is cold, we still might spend a lot
19665 of time processing large blocks. */
19666 if (optimize_function_for_size_p (cfun)
19667 || (optimize_insn_for_size_p ()
19668 && expected_size != -1 && expected_size < 256))
19669 optimize_for_speed = false;
19671 optimize_for_speed = true;
19673 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19675 *dynamic_check = -1;
19677 algs = &cost->memset[TARGET_64BIT != 0];
19679 algs = &cost->memcpy[TARGET_64BIT != 0];
19680 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19681 return stringop_alg;
19682 /* rep; movq or rep; movl is the smallest variant. */
19683 else if (!optimize_for_speed)
19685 if (!count || (count & 3))
19686 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19688 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19690 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19692 else if (expected_size != -1 && expected_size < 4)
19693 return loop_1_byte;
19694 else if (expected_size != -1)
19697 enum stringop_alg alg = libcall;
19698 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19700 /* We get here if the algorithms that were not libcall-based
19701 were rep-prefix based and we are unable to use rep prefixes
19702 based on global register usage. Break out of the loop and
19703 use the heuristic below. */
19704 if (algs->size[i].max == 0)
19706 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19708 enum stringop_alg candidate = algs->size[i].alg;
19710 if (candidate != libcall && ALG_USABLE_P (candidate))
19712 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19713 last non-libcall inline algorithm. */
19714 if (TARGET_INLINE_ALL_STRINGOPS)
19716 /* When the current size is best to be copied by a libcall,
19717 but we are still forced to inline, run the heuristic below
19718 that will pick code for medium sized blocks. */
19719 if (alg != libcall)
19723 else if (ALG_USABLE_P (candidate))
19727 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19729 /* When asked to inline the call anyway, try to pick meaningful choice.
19730 We look for maximal size of block that is faster to copy by hand and
19731 take blocks of at most of that size guessing that average size will
19732 be roughly half of the block.
19734 If this turns out to be bad, we might simply specify the preferred
19735 choice in ix86_costs. */
19736 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19737 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19740 enum stringop_alg alg;
19742 bool any_alg_usable_p = true;
19744 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19746 enum stringop_alg candidate = algs->size[i].alg;
19747 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19749 if (candidate != libcall && candidate
19750 && ALG_USABLE_P (candidate))
19751 max = algs->size[i].max;
19753 /* If there aren't any usable algorithms, then recursing on
19754 smaller sizes isn't going to find anything. Just return the
19755 simple byte-at-a-time copy loop. */
19756 if (!any_alg_usable_p)
19758 /* Pick something reasonable. */
19759 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19760 *dynamic_check = 128;
19761 return loop_1_byte;
19765 alg = decide_alg (count, max / 2, memset, dynamic_check);
19766 gcc_assert (*dynamic_check == -1);
19767 gcc_assert (alg != libcall);
19768 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19769 *dynamic_check = max;
19772 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19773 #undef ALG_USABLE_P
19776 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19777 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19779 decide_alignment (int align,
19780 enum stringop_alg alg,
19783 int desired_align = 0;
19787 gcc_unreachable ();
19789 case unrolled_loop:
19790 desired_align = GET_MODE_SIZE (Pmode);
19792 case rep_prefix_8_byte:
19795 case rep_prefix_4_byte:
19796 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19797 copying whole cacheline at once. */
19798 if (TARGET_PENTIUMPRO)
19803 case rep_prefix_1_byte:
19804 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19805 copying whole cacheline at once. */
19806 if (TARGET_PENTIUMPRO)
19820 if (desired_align < align)
19821 desired_align = align;
19822 if (expected_size != -1 && expected_size < 4)
19823 desired_align = align;
19824 return desired_align;
19827 /* Return the smallest power of 2 greater than VAL. */
19829 smallest_pow2_greater_than (int val)
19837 /* Expand string move (memcpy) operation. Use i386 string operations when
19838 profitable. expand_setmem contains similar code. The code depends upon
19839 architecture, block size and alignment, but always has the same
19842 1) Prologue guard: Conditional that jumps up to epilogues for small
19843 blocks that can be handled by epilogue alone. This is faster but
19844 also needed for correctness, since prologue assume the block is larger
19845 than the desired alignment.
19847 Optional dynamic check for size and libcall for large
19848 blocks is emitted here too, with -minline-stringops-dynamically.
19850 2) Prologue: copy first few bytes in order to get destination aligned
19851 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19852 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19853 We emit either a jump tree on power of two sized blocks, or a byte loop.
19855 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19856 with specified algorithm.
19858 4) Epilogue: code copying tail of the block that is too small to be
19859 handled by main body (or up to size guarded by prologue guard). */
19862 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19863 rtx expected_align_exp, rtx expected_size_exp)
19869 rtx jump_around_label = NULL;
19870 HOST_WIDE_INT align = 1;
19871 unsigned HOST_WIDE_INT count = 0;
19872 HOST_WIDE_INT expected_size = -1;
19873 int size_needed = 0, epilogue_size_needed;
19874 int desired_align = 0, align_bytes = 0;
19875 enum stringop_alg alg;
19877 bool need_zero_guard = false;
19879 if (CONST_INT_P (align_exp))
19880 align = INTVAL (align_exp);
19881 /* i386 can do misaligned access on reasonably increased cost. */
19882 if (CONST_INT_P (expected_align_exp)
19883 && INTVAL (expected_align_exp) > align)
19884 align = INTVAL (expected_align_exp);
19885 /* ALIGN is the minimum of destination and source alignment, but we care here
19886 just about destination alignment. */
19887 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19888 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19890 if (CONST_INT_P (count_exp))
19891 count = expected_size = INTVAL (count_exp);
19892 if (CONST_INT_P (expected_size_exp) && count == 0)
19893 expected_size = INTVAL (expected_size_exp);
19895 /* Make sure we don't need to care about overflow later on. */
19896 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19899 /* Step 0: Decide on preferred algorithm, desired alignment and
19900 size of chunks to be copied by main loop. */
19902 alg = decide_alg (count, expected_size, false, &dynamic_check);
19903 desired_align = decide_alignment (align, alg, expected_size);
19905 if (!TARGET_ALIGN_STRINGOPS)
19906 align = desired_align;
19908 if (alg == libcall)
19910 gcc_assert (alg != no_stringop);
19912 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19913 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19914 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19919 gcc_unreachable ();
19921 need_zero_guard = true;
19922 size_needed = GET_MODE_SIZE (Pmode);
19924 case unrolled_loop:
19925 need_zero_guard = true;
19926 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19928 case rep_prefix_8_byte:
19931 case rep_prefix_4_byte:
19934 case rep_prefix_1_byte:
19938 need_zero_guard = true;
19943 epilogue_size_needed = size_needed;
19945 /* Step 1: Prologue guard. */
19947 /* Alignment code needs count to be in register. */
19948 if (CONST_INT_P (count_exp) && desired_align > align)
19950 if (INTVAL (count_exp) > desired_align
19951 && INTVAL (count_exp) > size_needed)
19954 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19955 if (align_bytes <= 0)
19958 align_bytes = desired_align - align_bytes;
19960 if (align_bytes == 0)
19961 count_exp = force_reg (counter_mode (count_exp), count_exp);
19963 gcc_assert (desired_align >= 1 && align >= 1);
19965 /* Ensure that alignment prologue won't copy past end of block. */
19966 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19968 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19969 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19970 Make sure it is power of 2. */
19971 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19975 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19977 /* If main algorithm works on QImode, no epilogue is needed.
19978 For small sizes just don't align anything. */
19979 if (size_needed == 1)
19980 desired_align = align;
19987 label = gen_label_rtx ();
19988 emit_cmp_and_jump_insns (count_exp,
19989 GEN_INT (epilogue_size_needed),
19990 LTU, 0, counter_mode (count_exp), 1, label);
19991 if (expected_size == -1 || expected_size < epilogue_size_needed)
19992 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19994 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19998 /* Emit code to decide on runtime whether library call or inline should be
20000 if (dynamic_check != -1)
20002 if (CONST_INT_P (count_exp))
20004 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20006 emit_block_move_via_libcall (dst, src, count_exp, false);
20007 count_exp = const0_rtx;
20013 rtx hot_label = gen_label_rtx ();
20014 jump_around_label = gen_label_rtx ();
20015 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20016 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20017 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20018 emit_block_move_via_libcall (dst, src, count_exp, false);
20019 emit_jump (jump_around_label);
20020 emit_label (hot_label);
20024 /* Step 2: Alignment prologue. */
20026 if (desired_align > align)
20028 if (align_bytes == 0)
20030 /* Except for the first move in epilogue, we no longer know
20031 constant offset in aliasing info. It don't seems to worth
20032 the pain to maintain it for the first move, so throw away
20034 src = change_address (src, BLKmode, srcreg);
20035 dst = change_address (dst, BLKmode, destreg);
20036 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20041 /* If we know how many bytes need to be stored before dst is
20042 sufficiently aligned, maintain aliasing info accurately. */
20043 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20044 desired_align, align_bytes);
20045 count_exp = plus_constant (count_exp, -align_bytes);
20046 count -= align_bytes;
20048 if (need_zero_guard
20049 && (count < (unsigned HOST_WIDE_INT) size_needed
20050 || (align_bytes == 0
20051 && count < ((unsigned HOST_WIDE_INT) size_needed
20052 + desired_align - align))))
20054 /* It is possible that we copied enough so the main loop will not
20056 gcc_assert (size_needed > 1);
20057 if (label == NULL_RTX)
20058 label = gen_label_rtx ();
20059 emit_cmp_and_jump_insns (count_exp,
20060 GEN_INT (size_needed),
20061 LTU, 0, counter_mode (count_exp), 1, label);
20062 if (expected_size == -1
20063 || expected_size < (desired_align - align) / 2 + size_needed)
20064 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20066 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20069 if (label && size_needed == 1)
20071 emit_label (label);
20072 LABEL_NUSES (label) = 1;
20074 epilogue_size_needed = 1;
20076 else if (label == NULL_RTX)
20077 epilogue_size_needed = size_needed;
20079 /* Step 3: Main loop. */
20085 gcc_unreachable ();
20087 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20088 count_exp, QImode, 1, expected_size);
20091 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20092 count_exp, Pmode, 1, expected_size);
20094 case unrolled_loop:
20095 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20096 registers for 4 temporaries anyway. */
20097 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20098 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20101 case rep_prefix_8_byte:
20102 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20105 case rep_prefix_4_byte:
20106 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20109 case rep_prefix_1_byte:
20110 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20114 /* Adjust properly the offset of src and dest memory for aliasing. */
20115 if (CONST_INT_P (count_exp))
20117 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20118 (count / size_needed) * size_needed);
20119 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20120 (count / size_needed) * size_needed);
20124 src = change_address (src, BLKmode, srcreg);
20125 dst = change_address (dst, BLKmode, destreg);
20128 /* Step 4: Epilogue to copy the remaining bytes. */
20132 /* When the main loop is done, COUNT_EXP might hold original count,
20133 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20134 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20135 bytes. Compensate if needed. */
20137 if (size_needed < epilogue_size_needed)
20140 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20141 GEN_INT (size_needed - 1), count_exp, 1,
20143 if (tmp != count_exp)
20144 emit_move_insn (count_exp, tmp);
20146 emit_label (label);
20147 LABEL_NUSES (label) = 1;
20150 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20151 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20152 epilogue_size_needed);
20153 if (jump_around_label)
20154 emit_label (jump_around_label);
20158 /* Helper function for memcpy. For QImode value 0xXY produce
20159 0xXYXYXYXY of wide specified by MODE. This is essentially
20160 a * 0x10101010, but we can do slightly better than
20161 synth_mult by unwinding the sequence by hand on CPUs with
20164 promote_duplicated_reg (enum machine_mode mode, rtx val)
20166 enum machine_mode valmode = GET_MODE (val);
20168 int nops = mode == DImode ? 3 : 2;
20170 gcc_assert (mode == SImode || mode == DImode);
20171 if (val == const0_rtx)
20172 return copy_to_mode_reg (mode, const0_rtx);
20173 if (CONST_INT_P (val))
20175 HOST_WIDE_INT v = INTVAL (val) & 255;
20179 if (mode == DImode)
20180 v |= (v << 16) << 16;
20181 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20184 if (valmode == VOIDmode)
20186 if (valmode != QImode)
20187 val = gen_lowpart (QImode, val);
20188 if (mode == QImode)
20190 if (!TARGET_PARTIAL_REG_STALL)
20192 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20193 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20194 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20195 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20197 rtx reg = convert_modes (mode, QImode, val, true);
20198 tmp = promote_duplicated_reg (mode, const1_rtx);
20199 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20204 rtx reg = convert_modes (mode, QImode, val, true);
20206 if (!TARGET_PARTIAL_REG_STALL)
20207 if (mode == SImode)
20208 emit_insn (gen_movsi_insv_1 (reg, reg));
20210 emit_insn (gen_movdi_insv_1 (reg, reg));
20213 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20214 NULL, 1, OPTAB_DIRECT);
20216 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20218 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20219 NULL, 1, OPTAB_DIRECT);
20220 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20221 if (mode == SImode)
20223 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20224 NULL, 1, OPTAB_DIRECT);
20225 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20230 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20231 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20232 alignment from ALIGN to DESIRED_ALIGN. */
20234 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20239 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20240 promoted_val = promote_duplicated_reg (DImode, val);
20241 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20242 promoted_val = promote_duplicated_reg (SImode, val);
20243 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20244 promoted_val = promote_duplicated_reg (HImode, val);
20246 promoted_val = val;
20248 return promoted_val;
20251 /* Expand string clear operation (bzero). Use i386 string operations when
20252 profitable. See expand_movmem comment for explanation of individual
20253 steps performed. */
20255 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20256 rtx expected_align_exp, rtx expected_size_exp)
20261 rtx jump_around_label = NULL;
20262 HOST_WIDE_INT align = 1;
20263 unsigned HOST_WIDE_INT count = 0;
20264 HOST_WIDE_INT expected_size = -1;
20265 int size_needed = 0, epilogue_size_needed;
20266 int desired_align = 0, align_bytes = 0;
20267 enum stringop_alg alg;
20268 rtx promoted_val = NULL;
20269 bool force_loopy_epilogue = false;
20271 bool need_zero_guard = false;
20273 if (CONST_INT_P (align_exp))
20274 align = INTVAL (align_exp);
20275 /* i386 can do misaligned access on reasonably increased cost. */
20276 if (CONST_INT_P (expected_align_exp)
20277 && INTVAL (expected_align_exp) > align)
20278 align = INTVAL (expected_align_exp);
20279 if (CONST_INT_P (count_exp))
20280 count = expected_size = INTVAL (count_exp);
20281 if (CONST_INT_P (expected_size_exp) && count == 0)
20282 expected_size = INTVAL (expected_size_exp);
20284 /* Make sure we don't need to care about overflow later on. */
20285 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20288 /* Step 0: Decide on preferred algorithm, desired alignment and
20289 size of chunks to be copied by main loop. */
20291 alg = decide_alg (count, expected_size, true, &dynamic_check);
20292 desired_align = decide_alignment (align, alg, expected_size);
20294 if (!TARGET_ALIGN_STRINGOPS)
20295 align = desired_align;
20297 if (alg == libcall)
20299 gcc_assert (alg != no_stringop);
20301 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20302 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20307 gcc_unreachable ();
20309 need_zero_guard = true;
20310 size_needed = GET_MODE_SIZE (Pmode);
20312 case unrolled_loop:
20313 need_zero_guard = true;
20314 size_needed = GET_MODE_SIZE (Pmode) * 4;
20316 case rep_prefix_8_byte:
20319 case rep_prefix_4_byte:
20322 case rep_prefix_1_byte:
20326 need_zero_guard = true;
20330 epilogue_size_needed = size_needed;
20332 /* Step 1: Prologue guard. */
20334 /* Alignment code needs count to be in register. */
20335 if (CONST_INT_P (count_exp) && desired_align > align)
20337 if (INTVAL (count_exp) > desired_align
20338 && INTVAL (count_exp) > size_needed)
20341 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20342 if (align_bytes <= 0)
20345 align_bytes = desired_align - align_bytes;
20347 if (align_bytes == 0)
20349 enum machine_mode mode = SImode;
20350 if (TARGET_64BIT && (count & ~0xffffffff))
20352 count_exp = force_reg (mode, count_exp);
20355 /* Do the cheap promotion to allow better CSE across the
20356 main loop and epilogue (ie one load of the big constant in the
20357 front of all code. */
20358 if (CONST_INT_P (val_exp))
20359 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20360 desired_align, align);
20361 /* Ensure that alignment prologue won't copy past end of block. */
20362 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20364 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20365 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20366 Make sure it is power of 2. */
20367 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20369 /* To improve performance of small blocks, we jump around the VAL
20370 promoting mode. This mean that if the promoted VAL is not constant,
20371 we might not use it in the epilogue and have to use byte
20373 if (epilogue_size_needed > 2 && !promoted_val)
20374 force_loopy_epilogue = true;
20377 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20379 /* If main algorithm works on QImode, no epilogue is needed.
20380 For small sizes just don't align anything. */
20381 if (size_needed == 1)
20382 desired_align = align;
20389 label = gen_label_rtx ();
20390 emit_cmp_and_jump_insns (count_exp,
20391 GEN_INT (epilogue_size_needed),
20392 LTU, 0, counter_mode (count_exp), 1, label);
20393 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20394 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20396 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20399 if (dynamic_check != -1)
20401 rtx hot_label = gen_label_rtx ();
20402 jump_around_label = gen_label_rtx ();
20403 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20404 LEU, 0, counter_mode (count_exp), 1, hot_label);
20405 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20406 set_storage_via_libcall (dst, count_exp, val_exp, false);
20407 emit_jump (jump_around_label);
20408 emit_label (hot_label);
20411 /* Step 2: Alignment prologue. */
20413 /* Do the expensive promotion once we branched off the small blocks. */
20415 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20416 desired_align, align);
20417 gcc_assert (desired_align >= 1 && align >= 1);
20419 if (desired_align > align)
20421 if (align_bytes == 0)
20423 /* Except for the first move in epilogue, we no longer know
20424 constant offset in aliasing info. It don't seems to worth
20425 the pain to maintain it for the first move, so throw away
20427 dst = change_address (dst, BLKmode, destreg);
20428 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20433 /* If we know how many bytes need to be stored before dst is
20434 sufficiently aligned, maintain aliasing info accurately. */
20435 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20436 desired_align, align_bytes);
20437 count_exp = plus_constant (count_exp, -align_bytes);
20438 count -= align_bytes;
20440 if (need_zero_guard
20441 && (count < (unsigned HOST_WIDE_INT) size_needed
20442 || (align_bytes == 0
20443 && count < ((unsigned HOST_WIDE_INT) size_needed
20444 + desired_align - align))))
20446 /* It is possible that we copied enough so the main loop will not
20448 gcc_assert (size_needed > 1);
20449 if (label == NULL_RTX)
20450 label = gen_label_rtx ();
20451 emit_cmp_and_jump_insns (count_exp,
20452 GEN_INT (size_needed),
20453 LTU, 0, counter_mode (count_exp), 1, label);
20454 if (expected_size == -1
20455 || expected_size < (desired_align - align) / 2 + size_needed)
20456 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20458 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20461 if (label && size_needed == 1)
20463 emit_label (label);
20464 LABEL_NUSES (label) = 1;
20466 promoted_val = val_exp;
20467 epilogue_size_needed = 1;
20469 else if (label == NULL_RTX)
20470 epilogue_size_needed = size_needed;
20472 /* Step 3: Main loop. */
20478 gcc_unreachable ();
20480 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20481 count_exp, QImode, 1, expected_size);
20484 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20485 count_exp, Pmode, 1, expected_size);
20487 case unrolled_loop:
20488 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20489 count_exp, Pmode, 4, expected_size);
20491 case rep_prefix_8_byte:
20492 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20495 case rep_prefix_4_byte:
20496 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20499 case rep_prefix_1_byte:
20500 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20504 /* Adjust properly the offset of src and dest memory for aliasing. */
20505 if (CONST_INT_P (count_exp))
20506 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20507 (count / size_needed) * size_needed);
20509 dst = change_address (dst, BLKmode, destreg);
20511 /* Step 4: Epilogue to copy the remaining bytes. */
20515 /* When the main loop is done, COUNT_EXP might hold original count,
20516 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20517 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20518 bytes. Compensate if needed. */
20520 if (size_needed < epilogue_size_needed)
20523 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20524 GEN_INT (size_needed - 1), count_exp, 1,
20526 if (tmp != count_exp)
20527 emit_move_insn (count_exp, tmp);
20529 emit_label (label);
20530 LABEL_NUSES (label) = 1;
20533 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20535 if (force_loopy_epilogue)
20536 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20537 epilogue_size_needed);
20539 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20540 epilogue_size_needed);
20542 if (jump_around_label)
20543 emit_label (jump_around_label);
20547 /* Expand the appropriate insns for doing strlen if not just doing
20550 out = result, initialized with the start address
20551 align_rtx = alignment of the address.
20552 scratch = scratch register, initialized with the startaddress when
20553 not aligned, otherwise undefined
20555 This is just the body. It needs the initializations mentioned above and
20556 some address computing at the end. These things are done in i386.md. */
20559 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20563 rtx align_2_label = NULL_RTX;
20564 rtx align_3_label = NULL_RTX;
20565 rtx align_4_label = gen_label_rtx ();
20566 rtx end_0_label = gen_label_rtx ();
20568 rtx tmpreg = gen_reg_rtx (SImode);
20569 rtx scratch = gen_reg_rtx (SImode);
20573 if (CONST_INT_P (align_rtx))
20574 align = INTVAL (align_rtx);
20576 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20578 /* Is there a known alignment and is it less than 4? */
20581 rtx scratch1 = gen_reg_rtx (Pmode);
20582 emit_move_insn (scratch1, out);
20583 /* Is there a known alignment and is it not 2? */
20586 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20587 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20589 /* Leave just the 3 lower bits. */
20590 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20591 NULL_RTX, 0, OPTAB_WIDEN);
20593 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20594 Pmode, 1, align_4_label);
20595 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20596 Pmode, 1, align_2_label);
20597 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20598 Pmode, 1, align_3_label);
20602 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20603 check if is aligned to 4 - byte. */
20605 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20606 NULL_RTX, 0, OPTAB_WIDEN);
20608 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20609 Pmode, 1, align_4_label);
20612 mem = change_address (src, QImode, out);
20614 /* Now compare the bytes. */
20616 /* Compare the first n unaligned byte on a byte per byte basis. */
20617 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20618 QImode, 1, end_0_label);
20620 /* Increment the address. */
20621 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20623 /* Not needed with an alignment of 2 */
20626 emit_label (align_2_label);
20628 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20631 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20633 emit_label (align_3_label);
20636 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20639 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20642 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20643 align this loop. It gives only huge programs, but does not help to
20645 emit_label (align_4_label);
20647 mem = change_address (src, SImode, out);
20648 emit_move_insn (scratch, mem);
20649 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20651 /* This formula yields a nonzero result iff one of the bytes is zero.
20652 This saves three branches inside loop and many cycles. */
20654 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20655 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20656 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20657 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20658 gen_int_mode (0x80808080, SImode)));
20659 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20664 rtx reg = gen_reg_rtx (SImode);
20665 rtx reg2 = gen_reg_rtx (Pmode);
20666 emit_move_insn (reg, tmpreg);
20667 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20669 /* If zero is not in the first two bytes, move two bytes forward. */
20670 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20671 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20672 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20673 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20674 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20677 /* Emit lea manually to avoid clobbering of flags. */
20678 emit_insn (gen_rtx_SET (SImode, reg2,
20679 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20681 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20682 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20683 emit_insn (gen_rtx_SET (VOIDmode, out,
20684 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20690 rtx end_2_label = gen_label_rtx ();
20691 /* Is zero in the first two bytes? */
20693 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20694 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20695 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20696 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20697 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20699 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20700 JUMP_LABEL (tmp) = end_2_label;
20702 /* Not in the first two. Move two bytes forward. */
20703 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20704 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20706 emit_label (end_2_label);
20710 /* Avoid branch in fixing the byte. */
20711 tmpreg = gen_lowpart (QImode, tmpreg);
20712 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20713 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20714 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20715 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20717 emit_label (end_0_label);
20720 /* Expand strlen. */
20723 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20725 rtx addr, scratch1, scratch2, scratch3, scratch4;
20727 /* The generic case of strlen expander is long. Avoid it's
20728 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20730 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20731 && !TARGET_INLINE_ALL_STRINGOPS
20732 && !optimize_insn_for_size_p ()
20733 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20736 addr = force_reg (Pmode, XEXP (src, 0));
20737 scratch1 = gen_reg_rtx (Pmode);
20739 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20740 && !optimize_insn_for_size_p ())
20742 /* Well it seems that some optimizer does not combine a call like
20743 foo(strlen(bar), strlen(bar));
20744 when the move and the subtraction is done here. It does calculate
20745 the length just once when these instructions are done inside of
20746 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20747 often used and I use one fewer register for the lifetime of
20748 output_strlen_unroll() this is better. */
20750 emit_move_insn (out, addr);
20752 ix86_expand_strlensi_unroll_1 (out, src, align);
20754 /* strlensi_unroll_1 returns the address of the zero at the end of
20755 the string, like memchr(), so compute the length by subtracting
20756 the start address. */
20757 emit_insn (ix86_gen_sub3 (out, out, addr));
20763 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20764 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20767 scratch2 = gen_reg_rtx (Pmode);
20768 scratch3 = gen_reg_rtx (Pmode);
20769 scratch4 = force_reg (Pmode, constm1_rtx);
20771 emit_move_insn (scratch3, addr);
20772 eoschar = force_reg (QImode, eoschar);
20774 src = replace_equiv_address_nv (src, scratch3);
20776 /* If .md starts supporting :P, this can be done in .md. */
20777 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20778 scratch4), UNSPEC_SCAS);
20779 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20780 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20781 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20786 /* For given symbol (function) construct code to compute address of it's PLT
20787 entry in large x86-64 PIC model. */
20789 construct_plt_address (rtx symbol)
20791 rtx tmp = gen_reg_rtx (Pmode);
20792 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20794 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20795 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20797 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20798 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20803 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20805 rtx pop, int sibcall)
20807 rtx use = NULL, call;
20809 if (pop == const0_rtx)
20811 gcc_assert (!TARGET_64BIT || !pop);
20813 if (TARGET_MACHO && !TARGET_64BIT)
20816 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20817 fnaddr = machopic_indirect_call_target (fnaddr);
20822 /* Static functions and indirect calls don't need the pic register. */
20823 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20824 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20825 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20826 use_reg (&use, pic_offset_table_rtx);
20829 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20831 rtx al = gen_rtx_REG (QImode, AX_REG);
20832 emit_move_insn (al, callarg2);
20833 use_reg (&use, al);
20836 if (ix86_cmodel == CM_LARGE_PIC
20838 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20839 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20840 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20842 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20843 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20845 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20846 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20849 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20851 call = gen_rtx_SET (VOIDmode, retval, call);
20854 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20855 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20856 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20859 && ix86_cfun_abi () == MS_ABI
20860 && (!callarg2 || INTVAL (callarg2) != -2))
20862 /* We need to represent that SI and DI registers are clobbered
20864 static int clobbered_registers[] = {
20865 XMM6_REG, XMM7_REG, XMM8_REG,
20866 XMM9_REG, XMM10_REG, XMM11_REG,
20867 XMM12_REG, XMM13_REG, XMM14_REG,
20868 XMM15_REG, SI_REG, DI_REG
20871 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20872 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20873 UNSPEC_MS_TO_SYSV_CALL);
20877 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20878 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20881 (SSE_REGNO_P (clobbered_registers[i])
20883 clobbered_registers[i]));
20885 call = gen_rtx_PARALLEL (VOIDmode,
20886 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20890 call = emit_call_insn (call);
20892 CALL_INSN_FUNCTION_USAGE (call) = use;
20898 /* Clear stack slot assignments remembered from previous functions.
20899 This is called from INIT_EXPANDERS once before RTL is emitted for each
20902 static struct machine_function *
20903 ix86_init_machine_status (void)
20905 struct machine_function *f;
20907 f = ggc_alloc_cleared_machine_function ();
20908 f->use_fast_prologue_epilogue_nregs = -1;
20909 f->tls_descriptor_call_expanded_p = 0;
20910 f->call_abi = ix86_abi;
20915 /* Return a MEM corresponding to a stack slot with mode MODE.
20916 Allocate a new slot if necessary.
20918 The RTL for a function can have several slots available: N is
20919 which slot to use. */
20922 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20924 struct stack_local_entry *s;
20926 gcc_assert (n < MAX_386_STACK_LOCALS);
20928 /* Virtual slot is valid only before vregs are instantiated. */
20929 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20931 for (s = ix86_stack_locals; s; s = s->next)
20932 if (s->mode == mode && s->n == n)
20933 return copy_rtx (s->rtl);
20935 s = ggc_alloc_stack_local_entry ();
20938 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20940 s->next = ix86_stack_locals;
20941 ix86_stack_locals = s;
20945 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20947 static GTY(()) rtx ix86_tls_symbol;
20949 ix86_tls_get_addr (void)
20952 if (!ix86_tls_symbol)
20954 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20955 (TARGET_ANY_GNU_TLS
20957 ? "___tls_get_addr"
20958 : "__tls_get_addr");
20961 return ix86_tls_symbol;
20964 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20966 static GTY(()) rtx ix86_tls_module_base_symbol;
20968 ix86_tls_module_base (void)
20971 if (!ix86_tls_module_base_symbol)
20973 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20974 "_TLS_MODULE_BASE_");
20975 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20976 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20979 return ix86_tls_module_base_symbol;
20982 /* Calculate the length of the memory address in the instruction
20983 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20986 memory_address_length (rtx addr)
20988 struct ix86_address parts;
20989 rtx base, index, disp;
20993 if (GET_CODE (addr) == PRE_DEC
20994 || GET_CODE (addr) == POST_INC
20995 || GET_CODE (addr) == PRE_MODIFY
20996 || GET_CODE (addr) == POST_MODIFY)
20999 ok = ix86_decompose_address (addr, &parts);
21002 if (parts.base && GET_CODE (parts.base) == SUBREG)
21003 parts.base = SUBREG_REG (parts.base);
21004 if (parts.index && GET_CODE (parts.index) == SUBREG)
21005 parts.index = SUBREG_REG (parts.index);
21008 index = parts.index;
21013 - esp as the base always wants an index,
21014 - ebp as the base always wants a displacement,
21015 - r12 as the base always wants an index,
21016 - r13 as the base always wants a displacement. */
21018 /* Register Indirect. */
21019 if (base && !index && !disp)
21021 /* esp (for its index) and ebp (for its displacement) need
21022 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21025 && (addr == arg_pointer_rtx
21026 || addr == frame_pointer_rtx
21027 || REGNO (addr) == SP_REG
21028 || REGNO (addr) == BP_REG
21029 || REGNO (addr) == R12_REG
21030 || REGNO (addr) == R13_REG))
21034 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21035 is not disp32, but disp32(%rip), so for disp32
21036 SIB byte is needed, unless print_operand_address
21037 optimizes it into disp32(%rip) or (%rip) is implied
21039 else if (disp && !base && !index)
21046 if (GET_CODE (disp) == CONST)
21047 symbol = XEXP (disp, 0);
21048 if (GET_CODE (symbol) == PLUS
21049 && CONST_INT_P (XEXP (symbol, 1)))
21050 symbol = XEXP (symbol, 0);
21052 if (GET_CODE (symbol) != LABEL_REF
21053 && (GET_CODE (symbol) != SYMBOL_REF
21054 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21055 && (GET_CODE (symbol) != UNSPEC
21056 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21057 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21064 /* Find the length of the displacement constant. */
21067 if (base && satisfies_constraint_K (disp))
21072 /* ebp always wants a displacement. Similarly r13. */
21073 else if (base && REG_P (base)
21074 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21077 /* An index requires the two-byte modrm form.... */
21079 /* ...like esp (or r12), which always wants an index. */
21080 || base == arg_pointer_rtx
21081 || base == frame_pointer_rtx
21082 || (base && REG_P (base)
21083 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21100 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21101 is set, expect that insn have 8bit immediate alternative. */
21103 ix86_attr_length_immediate_default (rtx insn, int shortform)
21107 extract_insn_cached (insn);
21108 for (i = recog_data.n_operands - 1; i >= 0; --i)
21109 if (CONSTANT_P (recog_data.operand[i]))
21111 enum attr_mode mode = get_attr_mode (insn);
21114 if (shortform && CONST_INT_P (recog_data.operand[i]))
21116 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21123 ival = trunc_int_for_mode (ival, HImode);
21126 ival = trunc_int_for_mode (ival, SImode);
21131 if (IN_RANGE (ival, -128, 127))
21148 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21153 fatal_insn ("unknown insn mode", insn);
21158 /* Compute default value for "length_address" attribute. */
21160 ix86_attr_length_address_default (rtx insn)
21164 if (get_attr_type (insn) == TYPE_LEA)
21166 rtx set = PATTERN (insn), addr;
21168 if (GET_CODE (set) == PARALLEL)
21169 set = XVECEXP (set, 0, 0);
21171 gcc_assert (GET_CODE (set) == SET);
21173 addr = SET_SRC (set);
21174 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21176 if (GET_CODE (addr) == ZERO_EXTEND)
21177 addr = XEXP (addr, 0);
21178 if (GET_CODE (addr) == SUBREG)
21179 addr = SUBREG_REG (addr);
21182 return memory_address_length (addr);
21185 extract_insn_cached (insn);
21186 for (i = recog_data.n_operands - 1; i >= 0; --i)
21187 if (MEM_P (recog_data.operand[i]))
21189 constrain_operands_cached (reload_completed);
21190 if (which_alternative != -1)
21192 const char *constraints = recog_data.constraints[i];
21193 int alt = which_alternative;
21195 while (*constraints == '=' || *constraints == '+')
21198 while (*constraints++ != ',')
21200 /* Skip ignored operands. */
21201 if (*constraints == 'X')
21204 return memory_address_length (XEXP (recog_data.operand[i], 0));
21209 /* Compute default value for "length_vex" attribute. It includes
21210 2 or 3 byte VEX prefix and 1 opcode byte. */
21213 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21218 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21219 byte VEX prefix. */
21220 if (!has_0f_opcode || has_vex_w)
21223 /* We can always use 2 byte VEX prefix in 32bit. */
21227 extract_insn_cached (insn);
21229 for (i = recog_data.n_operands - 1; i >= 0; --i)
21230 if (REG_P (recog_data.operand[i]))
21232 /* REX.W bit uses 3 byte VEX prefix. */
21233 if (GET_MODE (recog_data.operand[i]) == DImode
21234 && GENERAL_REG_P (recog_data.operand[i]))
21239 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21240 if (MEM_P (recog_data.operand[i])
21241 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21248 /* Return the maximum number of instructions a cpu can issue. */
21251 ix86_issue_rate (void)
21255 case PROCESSOR_PENTIUM:
21256 case PROCESSOR_ATOM:
21260 case PROCESSOR_PENTIUMPRO:
21261 case PROCESSOR_PENTIUM4:
21262 case PROCESSOR_ATHLON:
21264 case PROCESSOR_AMDFAM10:
21265 case PROCESSOR_NOCONA:
21266 case PROCESSOR_GENERIC32:
21267 case PROCESSOR_GENERIC64:
21268 case PROCESSOR_BDVER1:
21271 case PROCESSOR_CORE2:
21279 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21280 by DEP_INSN and nothing set by DEP_INSN. */
21283 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21287 /* Simplify the test for uninteresting insns. */
21288 if (insn_type != TYPE_SETCC
21289 && insn_type != TYPE_ICMOV
21290 && insn_type != TYPE_FCMOV
21291 && insn_type != TYPE_IBR)
21294 if ((set = single_set (dep_insn)) != 0)
21296 set = SET_DEST (set);
21299 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21300 && XVECLEN (PATTERN (dep_insn), 0) == 2
21301 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21302 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21304 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21305 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21310 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21313 /* This test is true if the dependent insn reads the flags but
21314 not any other potentially set register. */
21315 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21318 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21324 /* Return true iff USE_INSN has a memory address with operands set by
21328 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21331 extract_insn_cached (use_insn);
21332 for (i = recog_data.n_operands - 1; i >= 0; --i)
21333 if (MEM_P (recog_data.operand[i]))
21335 rtx addr = XEXP (recog_data.operand[i], 0);
21336 return modified_in_p (addr, set_insn) != 0;
21342 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21344 enum attr_type insn_type, dep_insn_type;
21345 enum attr_memory memory;
21347 int dep_insn_code_number;
21349 /* Anti and output dependencies have zero cost on all CPUs. */
21350 if (REG_NOTE_KIND (link) != 0)
21353 dep_insn_code_number = recog_memoized (dep_insn);
21355 /* If we can't recognize the insns, we can't really do anything. */
21356 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21359 insn_type = get_attr_type (insn);
21360 dep_insn_type = get_attr_type (dep_insn);
21364 case PROCESSOR_PENTIUM:
21365 /* Address Generation Interlock adds a cycle of latency. */
21366 if (insn_type == TYPE_LEA)
21368 rtx addr = PATTERN (insn);
21370 if (GET_CODE (addr) == PARALLEL)
21371 addr = XVECEXP (addr, 0, 0);
21373 gcc_assert (GET_CODE (addr) == SET);
21375 addr = SET_SRC (addr);
21376 if (modified_in_p (addr, dep_insn))
21379 else if (ix86_agi_dependent (dep_insn, insn))
21382 /* ??? Compares pair with jump/setcc. */
21383 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21386 /* Floating point stores require value to be ready one cycle earlier. */
21387 if (insn_type == TYPE_FMOV
21388 && get_attr_memory (insn) == MEMORY_STORE
21389 && !ix86_agi_dependent (dep_insn, insn))
21393 case PROCESSOR_PENTIUMPRO:
21394 memory = get_attr_memory (insn);
21396 /* INT->FP conversion is expensive. */
21397 if (get_attr_fp_int_src (dep_insn))
21400 /* There is one cycle extra latency between an FP op and a store. */
21401 if (insn_type == TYPE_FMOV
21402 && (set = single_set (dep_insn)) != NULL_RTX
21403 && (set2 = single_set (insn)) != NULL_RTX
21404 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21405 && MEM_P (SET_DEST (set2)))
21408 /* Show ability of reorder buffer to hide latency of load by executing
21409 in parallel with previous instruction in case
21410 previous instruction is not needed to compute the address. */
21411 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21412 && !ix86_agi_dependent (dep_insn, insn))
21414 /* Claim moves to take one cycle, as core can issue one load
21415 at time and the next load can start cycle later. */
21416 if (dep_insn_type == TYPE_IMOV
21417 || dep_insn_type == TYPE_FMOV)
21425 memory = get_attr_memory (insn);
21427 /* The esp dependency is resolved before the instruction is really
21429 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21430 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21433 /* INT->FP conversion is expensive. */
21434 if (get_attr_fp_int_src (dep_insn))
21437 /* Show ability of reorder buffer to hide latency of load by executing
21438 in parallel with previous instruction in case
21439 previous instruction is not needed to compute the address. */
21440 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21441 && !ix86_agi_dependent (dep_insn, insn))
21443 /* Claim moves to take one cycle, as core can issue one load
21444 at time and the next load can start cycle later. */
21445 if (dep_insn_type == TYPE_IMOV
21446 || dep_insn_type == TYPE_FMOV)
21455 case PROCESSOR_ATHLON:
21457 case PROCESSOR_AMDFAM10:
21458 case PROCESSOR_BDVER1:
21459 case PROCESSOR_ATOM:
21460 case PROCESSOR_GENERIC32:
21461 case PROCESSOR_GENERIC64:
21462 memory = get_attr_memory (insn);
21464 /* Show ability of reorder buffer to hide latency of load by executing
21465 in parallel with previous instruction in case
21466 previous instruction is not needed to compute the address. */
21467 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21468 && !ix86_agi_dependent (dep_insn, insn))
21470 enum attr_unit unit = get_attr_unit (insn);
21473 /* Because of the difference between the length of integer and
21474 floating unit pipeline preparation stages, the memory operands
21475 for floating point are cheaper.
21477 ??? For Athlon it the difference is most probably 2. */
21478 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21481 loadcost = TARGET_ATHLON ? 2 : 0;
21483 if (cost >= loadcost)
21496 /* How many alternative schedules to try. This should be as wide as the
21497 scheduling freedom in the DFA, but no wider. Making this value too
21498 large results extra work for the scheduler. */
21501 ia32_multipass_dfa_lookahead (void)
21505 case PROCESSOR_PENTIUM:
21508 case PROCESSOR_PENTIUMPRO:
21518 /* Compute the alignment given to a constant that is being placed in memory.
21519 EXP is the constant and ALIGN is the alignment that the object would
21521 The value of this function is used instead of that alignment to align
21525 ix86_constant_alignment (tree exp, int align)
21527 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21528 || TREE_CODE (exp) == INTEGER_CST)
21530 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21532 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21535 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21536 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21537 return BITS_PER_WORD;
21542 /* Compute the alignment for a static variable.
21543 TYPE is the data type, and ALIGN is the alignment that
21544 the object would ordinarily have. The value of this function is used
21545 instead of that alignment to align the object. */
21548 ix86_data_alignment (tree type, int align)
21550 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21552 if (AGGREGATE_TYPE_P (type)
21553 && TYPE_SIZE (type)
21554 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21555 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21556 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21557 && align < max_align)
21560 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21561 to 16byte boundary. */
21564 if (AGGREGATE_TYPE_P (type)
21565 && TYPE_SIZE (type)
21566 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21567 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21568 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21572 if (TREE_CODE (type) == ARRAY_TYPE)
21574 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21576 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21579 else if (TREE_CODE (type) == COMPLEX_TYPE)
21582 if (TYPE_MODE (type) == DCmode && align < 64)
21584 if ((TYPE_MODE (type) == XCmode
21585 || TYPE_MODE (type) == TCmode) && align < 128)
21588 else if ((TREE_CODE (type) == RECORD_TYPE
21589 || TREE_CODE (type) == UNION_TYPE
21590 || TREE_CODE (type) == QUAL_UNION_TYPE)
21591 && TYPE_FIELDS (type))
21593 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21595 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21598 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21599 || TREE_CODE (type) == INTEGER_TYPE)
21601 if (TYPE_MODE (type) == DFmode && align < 64)
21603 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21610 /* Compute the alignment for a local variable or a stack slot. EXP is
21611 the data type or decl itself, MODE is the widest mode available and
21612 ALIGN is the alignment that the object would ordinarily have. The
21613 value of this macro is used instead of that alignment to align the
21617 ix86_local_alignment (tree exp, enum machine_mode mode,
21618 unsigned int align)
21622 if (exp && DECL_P (exp))
21624 type = TREE_TYPE (exp);
21633 /* Don't do dynamic stack realignment for long long objects with
21634 -mpreferred-stack-boundary=2. */
21637 && ix86_preferred_stack_boundary < 64
21638 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21639 && (!type || !TYPE_USER_ALIGN (type))
21640 && (!decl || !DECL_USER_ALIGN (decl)))
21643 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21644 register in MODE. We will return the largest alignment of XF
21648 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21649 align = GET_MODE_ALIGNMENT (DFmode);
21653 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21654 to 16byte boundary. Exact wording is:
21656 An array uses the same alignment as its elements, except that a local or
21657 global array variable of length at least 16 bytes or
21658 a C99 variable-length array variable always has alignment of at least 16 bytes.
21660 This was added to allow use of aligned SSE instructions at arrays. This
21661 rule is meant for static storage (where compiler can not do the analysis
21662 by itself). We follow it for automatic variables only when convenient.
21663 We fully control everything in the function compiled and functions from
21664 other unit can not rely on the alignment.
21666 Exclude va_list type. It is the common case of local array where
21667 we can not benefit from the alignment. */
21668 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21671 if (AGGREGATE_TYPE_P (type)
21672 && (TYPE_MAIN_VARIANT (type)
21673 != TYPE_MAIN_VARIANT (va_list_type_node))
21674 && TYPE_SIZE (type)
21675 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21676 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21677 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21680 if (TREE_CODE (type) == ARRAY_TYPE)
21682 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21684 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21687 else if (TREE_CODE (type) == COMPLEX_TYPE)
21689 if (TYPE_MODE (type) == DCmode && align < 64)
21691 if ((TYPE_MODE (type) == XCmode
21692 || TYPE_MODE (type) == TCmode) && align < 128)
21695 else if ((TREE_CODE (type) == RECORD_TYPE
21696 || TREE_CODE (type) == UNION_TYPE
21697 || TREE_CODE (type) == QUAL_UNION_TYPE)
21698 && TYPE_FIELDS (type))
21700 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21702 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21705 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21706 || TREE_CODE (type) == INTEGER_TYPE)
21709 if (TYPE_MODE (type) == DFmode && align < 64)
21711 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21717 /* Compute the minimum required alignment for dynamic stack realignment
21718 purposes for a local variable, parameter or a stack slot. EXP is
21719 the data type or decl itself, MODE is its mode and ALIGN is the
21720 alignment that the object would ordinarily have. */
21723 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21724 unsigned int align)
21728 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21731 if (exp && DECL_P (exp))
21733 type = TREE_TYPE (exp);
21742 /* Don't do dynamic stack realignment for long long objects with
21743 -mpreferred-stack-boundary=2. */
21744 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21745 && (!type || !TYPE_USER_ALIGN (type))
21746 && (!decl || !DECL_USER_ALIGN (decl)))
21752 /* Find a location for the static chain incoming to a nested function.
21753 This is a register, unless all free registers are used by arguments. */
21756 ix86_static_chain (const_tree fndecl, bool incoming_p)
21760 if (!DECL_STATIC_CHAIN (fndecl))
21765 /* We always use R10 in 64-bit mode. */
21771 /* By default in 32-bit mode we use ECX to pass the static chain. */
21774 fntype = TREE_TYPE (fndecl);
21775 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21777 /* Fastcall functions use ecx/edx for arguments, which leaves
21778 us with EAX for the static chain. */
21781 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21783 /* Thiscall functions use ecx for arguments, which leaves
21784 us with EAX for the static chain. */
21787 else if (ix86_function_regparm (fntype, fndecl) == 3)
21789 /* For regparm 3, we have no free call-clobbered registers in
21790 which to store the static chain. In order to implement this,
21791 we have the trampoline push the static chain to the stack.
21792 However, we can't push a value below the return address when
21793 we call the nested function directly, so we have to use an
21794 alternate entry point. For this we use ESI, and have the
21795 alternate entry point push ESI, so that things appear the
21796 same once we're executing the nested function. */
21799 if (fndecl == current_function_decl)
21800 ix86_static_chain_on_stack = true;
21801 return gen_frame_mem (SImode,
21802 plus_constant (arg_pointer_rtx, -8));
21808 return gen_rtx_REG (Pmode, regno);
21811 /* Emit RTL insns to initialize the variable parts of a trampoline.
21812 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21813 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21814 to be passed to the target function. */
21817 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21821 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21828 /* Depending on the static chain location, either load a register
21829 with a constant, or push the constant to the stack. All of the
21830 instructions are the same size. */
21831 chain = ix86_static_chain (fndecl, true);
21834 if (REGNO (chain) == CX_REG)
21836 else if (REGNO (chain) == AX_REG)
21839 gcc_unreachable ();
21844 mem = adjust_address (m_tramp, QImode, 0);
21845 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21847 mem = adjust_address (m_tramp, SImode, 1);
21848 emit_move_insn (mem, chain_value);
21850 /* Compute offset from the end of the jmp to the target function.
21851 In the case in which the trampoline stores the static chain on
21852 the stack, we need to skip the first insn which pushes the
21853 (call-saved) register static chain; this push is 1 byte. */
21854 disp = expand_binop (SImode, sub_optab, fnaddr,
21855 plus_constant (XEXP (m_tramp, 0),
21856 MEM_P (chain) ? 9 : 10),
21857 NULL_RTX, 1, OPTAB_DIRECT);
21859 mem = adjust_address (m_tramp, QImode, 5);
21860 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21862 mem = adjust_address (m_tramp, SImode, 6);
21863 emit_move_insn (mem, disp);
21869 /* Load the function address to r11. Try to load address using
21870 the shorter movl instead of movabs. We may want to support
21871 movq for kernel mode, but kernel does not use trampolines at
21873 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21875 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21877 mem = adjust_address (m_tramp, HImode, offset);
21878 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21880 mem = adjust_address (m_tramp, SImode, offset + 2);
21881 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21886 mem = adjust_address (m_tramp, HImode, offset);
21887 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21889 mem = adjust_address (m_tramp, DImode, offset + 2);
21890 emit_move_insn (mem, fnaddr);
21894 /* Load static chain using movabs to r10. */
21895 mem = adjust_address (m_tramp, HImode, offset);
21896 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21898 mem = adjust_address (m_tramp, DImode, offset + 2);
21899 emit_move_insn (mem, chain_value);
21902 /* Jump to r11; the last (unused) byte is a nop, only there to
21903 pad the write out to a single 32-bit store. */
21904 mem = adjust_address (m_tramp, SImode, offset);
21905 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21908 gcc_assert (offset <= TRAMPOLINE_SIZE);
21911 #ifdef ENABLE_EXECUTE_STACK
21912 #ifdef CHECK_EXECUTE_STACK_ENABLED
21913 if (CHECK_EXECUTE_STACK_ENABLED)
21915 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21916 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21920 /* The following file contains several enumerations and data structures
21921 built from the definitions in i386-builtin-types.def. */
21923 #include "i386-builtin-types.inc"
21925 /* Table for the ix86 builtin non-function types. */
21926 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21928 /* Retrieve an element from the above table, building some of
21929 the types lazily. */
21932 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21934 unsigned int index;
21937 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21939 type = ix86_builtin_type_tab[(int) tcode];
21943 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21944 if (tcode <= IX86_BT_LAST_VECT)
21946 enum machine_mode mode;
21948 index = tcode - IX86_BT_LAST_PRIM - 1;
21949 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21950 mode = ix86_builtin_type_vect_mode[index];
21952 type = build_vector_type_for_mode (itype, mode);
21958 index = tcode - IX86_BT_LAST_VECT - 1;
21959 if (tcode <= IX86_BT_LAST_PTR)
21960 quals = TYPE_UNQUALIFIED;
21962 quals = TYPE_QUAL_CONST;
21964 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21965 if (quals != TYPE_UNQUALIFIED)
21966 itype = build_qualified_type (itype, quals);
21968 type = build_pointer_type (itype);
21971 ix86_builtin_type_tab[(int) tcode] = type;
21975 /* Table for the ix86 builtin function types. */
21976 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21978 /* Retrieve an element from the above table, building some of
21979 the types lazily. */
21982 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21986 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21988 type = ix86_builtin_func_type_tab[(int) tcode];
21992 if (tcode <= IX86_BT_LAST_FUNC)
21994 unsigned start = ix86_builtin_func_start[(int) tcode];
21995 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21996 tree rtype, atype, args = void_list_node;
21999 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
22000 for (i = after - 1; i > start; --i)
22002 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
22003 args = tree_cons (NULL, atype, args);
22006 type = build_function_type (rtype, args);
22010 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
22011 enum ix86_builtin_func_type icode;
22013 icode = ix86_builtin_func_alias_base[index];
22014 type = ix86_get_builtin_func_type (icode);
22017 ix86_builtin_func_type_tab[(int) tcode] = type;
22022 /* Codes for all the SSE/MMX builtins. */
22025 IX86_BUILTIN_ADDPS,
22026 IX86_BUILTIN_ADDSS,
22027 IX86_BUILTIN_DIVPS,
22028 IX86_BUILTIN_DIVSS,
22029 IX86_BUILTIN_MULPS,
22030 IX86_BUILTIN_MULSS,
22031 IX86_BUILTIN_SUBPS,
22032 IX86_BUILTIN_SUBSS,
22034 IX86_BUILTIN_CMPEQPS,
22035 IX86_BUILTIN_CMPLTPS,
22036 IX86_BUILTIN_CMPLEPS,
22037 IX86_BUILTIN_CMPGTPS,
22038 IX86_BUILTIN_CMPGEPS,
22039 IX86_BUILTIN_CMPNEQPS,
22040 IX86_BUILTIN_CMPNLTPS,
22041 IX86_BUILTIN_CMPNLEPS,
22042 IX86_BUILTIN_CMPNGTPS,
22043 IX86_BUILTIN_CMPNGEPS,
22044 IX86_BUILTIN_CMPORDPS,
22045 IX86_BUILTIN_CMPUNORDPS,
22046 IX86_BUILTIN_CMPEQSS,
22047 IX86_BUILTIN_CMPLTSS,
22048 IX86_BUILTIN_CMPLESS,
22049 IX86_BUILTIN_CMPNEQSS,
22050 IX86_BUILTIN_CMPNLTSS,
22051 IX86_BUILTIN_CMPNLESS,
22052 IX86_BUILTIN_CMPNGTSS,
22053 IX86_BUILTIN_CMPNGESS,
22054 IX86_BUILTIN_CMPORDSS,
22055 IX86_BUILTIN_CMPUNORDSS,
22057 IX86_BUILTIN_COMIEQSS,
22058 IX86_BUILTIN_COMILTSS,
22059 IX86_BUILTIN_COMILESS,
22060 IX86_BUILTIN_COMIGTSS,
22061 IX86_BUILTIN_COMIGESS,
22062 IX86_BUILTIN_COMINEQSS,
22063 IX86_BUILTIN_UCOMIEQSS,
22064 IX86_BUILTIN_UCOMILTSS,
22065 IX86_BUILTIN_UCOMILESS,
22066 IX86_BUILTIN_UCOMIGTSS,
22067 IX86_BUILTIN_UCOMIGESS,
22068 IX86_BUILTIN_UCOMINEQSS,
22070 IX86_BUILTIN_CVTPI2PS,
22071 IX86_BUILTIN_CVTPS2PI,
22072 IX86_BUILTIN_CVTSI2SS,
22073 IX86_BUILTIN_CVTSI642SS,
22074 IX86_BUILTIN_CVTSS2SI,
22075 IX86_BUILTIN_CVTSS2SI64,
22076 IX86_BUILTIN_CVTTPS2PI,
22077 IX86_BUILTIN_CVTTSS2SI,
22078 IX86_BUILTIN_CVTTSS2SI64,
22080 IX86_BUILTIN_MAXPS,
22081 IX86_BUILTIN_MAXSS,
22082 IX86_BUILTIN_MINPS,
22083 IX86_BUILTIN_MINSS,
22085 IX86_BUILTIN_LOADUPS,
22086 IX86_BUILTIN_STOREUPS,
22087 IX86_BUILTIN_MOVSS,
22089 IX86_BUILTIN_MOVHLPS,
22090 IX86_BUILTIN_MOVLHPS,
22091 IX86_BUILTIN_LOADHPS,
22092 IX86_BUILTIN_LOADLPS,
22093 IX86_BUILTIN_STOREHPS,
22094 IX86_BUILTIN_STORELPS,
22096 IX86_BUILTIN_MASKMOVQ,
22097 IX86_BUILTIN_MOVMSKPS,
22098 IX86_BUILTIN_PMOVMSKB,
22100 IX86_BUILTIN_MOVNTPS,
22101 IX86_BUILTIN_MOVNTQ,
22103 IX86_BUILTIN_LOADDQU,
22104 IX86_BUILTIN_STOREDQU,
22106 IX86_BUILTIN_PACKSSWB,
22107 IX86_BUILTIN_PACKSSDW,
22108 IX86_BUILTIN_PACKUSWB,
22110 IX86_BUILTIN_PADDB,
22111 IX86_BUILTIN_PADDW,
22112 IX86_BUILTIN_PADDD,
22113 IX86_BUILTIN_PADDQ,
22114 IX86_BUILTIN_PADDSB,
22115 IX86_BUILTIN_PADDSW,
22116 IX86_BUILTIN_PADDUSB,
22117 IX86_BUILTIN_PADDUSW,
22118 IX86_BUILTIN_PSUBB,
22119 IX86_BUILTIN_PSUBW,
22120 IX86_BUILTIN_PSUBD,
22121 IX86_BUILTIN_PSUBQ,
22122 IX86_BUILTIN_PSUBSB,
22123 IX86_BUILTIN_PSUBSW,
22124 IX86_BUILTIN_PSUBUSB,
22125 IX86_BUILTIN_PSUBUSW,
22128 IX86_BUILTIN_PANDN,
22132 IX86_BUILTIN_PAVGB,
22133 IX86_BUILTIN_PAVGW,
22135 IX86_BUILTIN_PCMPEQB,
22136 IX86_BUILTIN_PCMPEQW,
22137 IX86_BUILTIN_PCMPEQD,
22138 IX86_BUILTIN_PCMPGTB,
22139 IX86_BUILTIN_PCMPGTW,
22140 IX86_BUILTIN_PCMPGTD,
22142 IX86_BUILTIN_PMADDWD,
22144 IX86_BUILTIN_PMAXSW,
22145 IX86_BUILTIN_PMAXUB,
22146 IX86_BUILTIN_PMINSW,
22147 IX86_BUILTIN_PMINUB,
22149 IX86_BUILTIN_PMULHUW,
22150 IX86_BUILTIN_PMULHW,
22151 IX86_BUILTIN_PMULLW,
22153 IX86_BUILTIN_PSADBW,
22154 IX86_BUILTIN_PSHUFW,
22156 IX86_BUILTIN_PSLLW,
22157 IX86_BUILTIN_PSLLD,
22158 IX86_BUILTIN_PSLLQ,
22159 IX86_BUILTIN_PSRAW,
22160 IX86_BUILTIN_PSRAD,
22161 IX86_BUILTIN_PSRLW,
22162 IX86_BUILTIN_PSRLD,
22163 IX86_BUILTIN_PSRLQ,
22164 IX86_BUILTIN_PSLLWI,
22165 IX86_BUILTIN_PSLLDI,
22166 IX86_BUILTIN_PSLLQI,
22167 IX86_BUILTIN_PSRAWI,
22168 IX86_BUILTIN_PSRADI,
22169 IX86_BUILTIN_PSRLWI,
22170 IX86_BUILTIN_PSRLDI,
22171 IX86_BUILTIN_PSRLQI,
22173 IX86_BUILTIN_PUNPCKHBW,
22174 IX86_BUILTIN_PUNPCKHWD,
22175 IX86_BUILTIN_PUNPCKHDQ,
22176 IX86_BUILTIN_PUNPCKLBW,
22177 IX86_BUILTIN_PUNPCKLWD,
22178 IX86_BUILTIN_PUNPCKLDQ,
22180 IX86_BUILTIN_SHUFPS,
22182 IX86_BUILTIN_RCPPS,
22183 IX86_BUILTIN_RCPSS,
22184 IX86_BUILTIN_RSQRTPS,
22185 IX86_BUILTIN_RSQRTPS_NR,
22186 IX86_BUILTIN_RSQRTSS,
22187 IX86_BUILTIN_RSQRTF,
22188 IX86_BUILTIN_SQRTPS,
22189 IX86_BUILTIN_SQRTPS_NR,
22190 IX86_BUILTIN_SQRTSS,
22192 IX86_BUILTIN_UNPCKHPS,
22193 IX86_BUILTIN_UNPCKLPS,
22195 IX86_BUILTIN_ANDPS,
22196 IX86_BUILTIN_ANDNPS,
22198 IX86_BUILTIN_XORPS,
22201 IX86_BUILTIN_LDMXCSR,
22202 IX86_BUILTIN_STMXCSR,
22203 IX86_BUILTIN_SFENCE,
22205 /* 3DNow! Original */
22206 IX86_BUILTIN_FEMMS,
22207 IX86_BUILTIN_PAVGUSB,
22208 IX86_BUILTIN_PF2ID,
22209 IX86_BUILTIN_PFACC,
22210 IX86_BUILTIN_PFADD,
22211 IX86_BUILTIN_PFCMPEQ,
22212 IX86_BUILTIN_PFCMPGE,
22213 IX86_BUILTIN_PFCMPGT,
22214 IX86_BUILTIN_PFMAX,
22215 IX86_BUILTIN_PFMIN,
22216 IX86_BUILTIN_PFMUL,
22217 IX86_BUILTIN_PFRCP,
22218 IX86_BUILTIN_PFRCPIT1,
22219 IX86_BUILTIN_PFRCPIT2,
22220 IX86_BUILTIN_PFRSQIT1,
22221 IX86_BUILTIN_PFRSQRT,
22222 IX86_BUILTIN_PFSUB,
22223 IX86_BUILTIN_PFSUBR,
22224 IX86_BUILTIN_PI2FD,
22225 IX86_BUILTIN_PMULHRW,
22227 /* 3DNow! Athlon Extensions */
22228 IX86_BUILTIN_PF2IW,
22229 IX86_BUILTIN_PFNACC,
22230 IX86_BUILTIN_PFPNACC,
22231 IX86_BUILTIN_PI2FW,
22232 IX86_BUILTIN_PSWAPDSI,
22233 IX86_BUILTIN_PSWAPDSF,
22236 IX86_BUILTIN_ADDPD,
22237 IX86_BUILTIN_ADDSD,
22238 IX86_BUILTIN_DIVPD,
22239 IX86_BUILTIN_DIVSD,
22240 IX86_BUILTIN_MULPD,
22241 IX86_BUILTIN_MULSD,
22242 IX86_BUILTIN_SUBPD,
22243 IX86_BUILTIN_SUBSD,
22245 IX86_BUILTIN_CMPEQPD,
22246 IX86_BUILTIN_CMPLTPD,
22247 IX86_BUILTIN_CMPLEPD,
22248 IX86_BUILTIN_CMPGTPD,
22249 IX86_BUILTIN_CMPGEPD,
22250 IX86_BUILTIN_CMPNEQPD,
22251 IX86_BUILTIN_CMPNLTPD,
22252 IX86_BUILTIN_CMPNLEPD,
22253 IX86_BUILTIN_CMPNGTPD,
22254 IX86_BUILTIN_CMPNGEPD,
22255 IX86_BUILTIN_CMPORDPD,
22256 IX86_BUILTIN_CMPUNORDPD,
22257 IX86_BUILTIN_CMPEQSD,
22258 IX86_BUILTIN_CMPLTSD,
22259 IX86_BUILTIN_CMPLESD,
22260 IX86_BUILTIN_CMPNEQSD,
22261 IX86_BUILTIN_CMPNLTSD,
22262 IX86_BUILTIN_CMPNLESD,
22263 IX86_BUILTIN_CMPORDSD,
22264 IX86_BUILTIN_CMPUNORDSD,
22266 IX86_BUILTIN_COMIEQSD,
22267 IX86_BUILTIN_COMILTSD,
22268 IX86_BUILTIN_COMILESD,
22269 IX86_BUILTIN_COMIGTSD,
22270 IX86_BUILTIN_COMIGESD,
22271 IX86_BUILTIN_COMINEQSD,
22272 IX86_BUILTIN_UCOMIEQSD,
22273 IX86_BUILTIN_UCOMILTSD,
22274 IX86_BUILTIN_UCOMILESD,
22275 IX86_BUILTIN_UCOMIGTSD,
22276 IX86_BUILTIN_UCOMIGESD,
22277 IX86_BUILTIN_UCOMINEQSD,
22279 IX86_BUILTIN_MAXPD,
22280 IX86_BUILTIN_MAXSD,
22281 IX86_BUILTIN_MINPD,
22282 IX86_BUILTIN_MINSD,
22284 IX86_BUILTIN_ANDPD,
22285 IX86_BUILTIN_ANDNPD,
22287 IX86_BUILTIN_XORPD,
22289 IX86_BUILTIN_SQRTPD,
22290 IX86_BUILTIN_SQRTSD,
22292 IX86_BUILTIN_UNPCKHPD,
22293 IX86_BUILTIN_UNPCKLPD,
22295 IX86_BUILTIN_SHUFPD,
22297 IX86_BUILTIN_LOADUPD,
22298 IX86_BUILTIN_STOREUPD,
22299 IX86_BUILTIN_MOVSD,
22301 IX86_BUILTIN_LOADHPD,
22302 IX86_BUILTIN_LOADLPD,
22304 IX86_BUILTIN_CVTDQ2PD,
22305 IX86_BUILTIN_CVTDQ2PS,
22307 IX86_BUILTIN_CVTPD2DQ,
22308 IX86_BUILTIN_CVTPD2PI,
22309 IX86_BUILTIN_CVTPD2PS,
22310 IX86_BUILTIN_CVTTPD2DQ,
22311 IX86_BUILTIN_CVTTPD2PI,
22313 IX86_BUILTIN_CVTPI2PD,
22314 IX86_BUILTIN_CVTSI2SD,
22315 IX86_BUILTIN_CVTSI642SD,
22317 IX86_BUILTIN_CVTSD2SI,
22318 IX86_BUILTIN_CVTSD2SI64,
22319 IX86_BUILTIN_CVTSD2SS,
22320 IX86_BUILTIN_CVTSS2SD,
22321 IX86_BUILTIN_CVTTSD2SI,
22322 IX86_BUILTIN_CVTTSD2SI64,
22324 IX86_BUILTIN_CVTPS2DQ,
22325 IX86_BUILTIN_CVTPS2PD,
22326 IX86_BUILTIN_CVTTPS2DQ,
22328 IX86_BUILTIN_MOVNTI,
22329 IX86_BUILTIN_MOVNTPD,
22330 IX86_BUILTIN_MOVNTDQ,
22332 IX86_BUILTIN_MOVQ128,
22335 IX86_BUILTIN_MASKMOVDQU,
22336 IX86_BUILTIN_MOVMSKPD,
22337 IX86_BUILTIN_PMOVMSKB128,
22339 IX86_BUILTIN_PACKSSWB128,
22340 IX86_BUILTIN_PACKSSDW128,
22341 IX86_BUILTIN_PACKUSWB128,
22343 IX86_BUILTIN_PADDB128,
22344 IX86_BUILTIN_PADDW128,
22345 IX86_BUILTIN_PADDD128,
22346 IX86_BUILTIN_PADDQ128,
22347 IX86_BUILTIN_PADDSB128,
22348 IX86_BUILTIN_PADDSW128,
22349 IX86_BUILTIN_PADDUSB128,
22350 IX86_BUILTIN_PADDUSW128,
22351 IX86_BUILTIN_PSUBB128,
22352 IX86_BUILTIN_PSUBW128,
22353 IX86_BUILTIN_PSUBD128,
22354 IX86_BUILTIN_PSUBQ128,
22355 IX86_BUILTIN_PSUBSB128,
22356 IX86_BUILTIN_PSUBSW128,
22357 IX86_BUILTIN_PSUBUSB128,
22358 IX86_BUILTIN_PSUBUSW128,
22360 IX86_BUILTIN_PAND128,
22361 IX86_BUILTIN_PANDN128,
22362 IX86_BUILTIN_POR128,
22363 IX86_BUILTIN_PXOR128,
22365 IX86_BUILTIN_PAVGB128,
22366 IX86_BUILTIN_PAVGW128,
22368 IX86_BUILTIN_PCMPEQB128,
22369 IX86_BUILTIN_PCMPEQW128,
22370 IX86_BUILTIN_PCMPEQD128,
22371 IX86_BUILTIN_PCMPGTB128,
22372 IX86_BUILTIN_PCMPGTW128,
22373 IX86_BUILTIN_PCMPGTD128,
22375 IX86_BUILTIN_PMADDWD128,
22377 IX86_BUILTIN_PMAXSW128,
22378 IX86_BUILTIN_PMAXUB128,
22379 IX86_BUILTIN_PMINSW128,
22380 IX86_BUILTIN_PMINUB128,
22382 IX86_BUILTIN_PMULUDQ,
22383 IX86_BUILTIN_PMULUDQ128,
22384 IX86_BUILTIN_PMULHUW128,
22385 IX86_BUILTIN_PMULHW128,
22386 IX86_BUILTIN_PMULLW128,
22388 IX86_BUILTIN_PSADBW128,
22389 IX86_BUILTIN_PSHUFHW,
22390 IX86_BUILTIN_PSHUFLW,
22391 IX86_BUILTIN_PSHUFD,
22393 IX86_BUILTIN_PSLLDQI128,
22394 IX86_BUILTIN_PSLLWI128,
22395 IX86_BUILTIN_PSLLDI128,
22396 IX86_BUILTIN_PSLLQI128,
22397 IX86_BUILTIN_PSRAWI128,
22398 IX86_BUILTIN_PSRADI128,
22399 IX86_BUILTIN_PSRLDQI128,
22400 IX86_BUILTIN_PSRLWI128,
22401 IX86_BUILTIN_PSRLDI128,
22402 IX86_BUILTIN_PSRLQI128,
22404 IX86_BUILTIN_PSLLDQ128,
22405 IX86_BUILTIN_PSLLW128,
22406 IX86_BUILTIN_PSLLD128,
22407 IX86_BUILTIN_PSLLQ128,
22408 IX86_BUILTIN_PSRAW128,
22409 IX86_BUILTIN_PSRAD128,
22410 IX86_BUILTIN_PSRLW128,
22411 IX86_BUILTIN_PSRLD128,
22412 IX86_BUILTIN_PSRLQ128,
22414 IX86_BUILTIN_PUNPCKHBW128,
22415 IX86_BUILTIN_PUNPCKHWD128,
22416 IX86_BUILTIN_PUNPCKHDQ128,
22417 IX86_BUILTIN_PUNPCKHQDQ128,
22418 IX86_BUILTIN_PUNPCKLBW128,
22419 IX86_BUILTIN_PUNPCKLWD128,
22420 IX86_BUILTIN_PUNPCKLDQ128,
22421 IX86_BUILTIN_PUNPCKLQDQ128,
22423 IX86_BUILTIN_CLFLUSH,
22424 IX86_BUILTIN_MFENCE,
22425 IX86_BUILTIN_LFENCE,
22427 IX86_BUILTIN_BSRSI,
22428 IX86_BUILTIN_BSRDI,
22429 IX86_BUILTIN_RDPMC,
22430 IX86_BUILTIN_RDTSC,
22431 IX86_BUILTIN_RDTSCP,
22432 IX86_BUILTIN_ROLQI,
22433 IX86_BUILTIN_ROLHI,
22434 IX86_BUILTIN_RORQI,
22435 IX86_BUILTIN_RORHI,
22438 IX86_BUILTIN_ADDSUBPS,
22439 IX86_BUILTIN_HADDPS,
22440 IX86_BUILTIN_HSUBPS,
22441 IX86_BUILTIN_MOVSHDUP,
22442 IX86_BUILTIN_MOVSLDUP,
22443 IX86_BUILTIN_ADDSUBPD,
22444 IX86_BUILTIN_HADDPD,
22445 IX86_BUILTIN_HSUBPD,
22446 IX86_BUILTIN_LDDQU,
22448 IX86_BUILTIN_MONITOR,
22449 IX86_BUILTIN_MWAIT,
22452 IX86_BUILTIN_PHADDW,
22453 IX86_BUILTIN_PHADDD,
22454 IX86_BUILTIN_PHADDSW,
22455 IX86_BUILTIN_PHSUBW,
22456 IX86_BUILTIN_PHSUBD,
22457 IX86_BUILTIN_PHSUBSW,
22458 IX86_BUILTIN_PMADDUBSW,
22459 IX86_BUILTIN_PMULHRSW,
22460 IX86_BUILTIN_PSHUFB,
22461 IX86_BUILTIN_PSIGNB,
22462 IX86_BUILTIN_PSIGNW,
22463 IX86_BUILTIN_PSIGND,
22464 IX86_BUILTIN_PALIGNR,
22465 IX86_BUILTIN_PABSB,
22466 IX86_BUILTIN_PABSW,
22467 IX86_BUILTIN_PABSD,
22469 IX86_BUILTIN_PHADDW128,
22470 IX86_BUILTIN_PHADDD128,
22471 IX86_BUILTIN_PHADDSW128,
22472 IX86_BUILTIN_PHSUBW128,
22473 IX86_BUILTIN_PHSUBD128,
22474 IX86_BUILTIN_PHSUBSW128,
22475 IX86_BUILTIN_PMADDUBSW128,
22476 IX86_BUILTIN_PMULHRSW128,
22477 IX86_BUILTIN_PSHUFB128,
22478 IX86_BUILTIN_PSIGNB128,
22479 IX86_BUILTIN_PSIGNW128,
22480 IX86_BUILTIN_PSIGND128,
22481 IX86_BUILTIN_PALIGNR128,
22482 IX86_BUILTIN_PABSB128,
22483 IX86_BUILTIN_PABSW128,
22484 IX86_BUILTIN_PABSD128,
22486 /* AMDFAM10 - SSE4A New Instructions. */
22487 IX86_BUILTIN_MOVNTSD,
22488 IX86_BUILTIN_MOVNTSS,
22489 IX86_BUILTIN_EXTRQI,
22490 IX86_BUILTIN_EXTRQ,
22491 IX86_BUILTIN_INSERTQI,
22492 IX86_BUILTIN_INSERTQ,
22495 IX86_BUILTIN_BLENDPD,
22496 IX86_BUILTIN_BLENDPS,
22497 IX86_BUILTIN_BLENDVPD,
22498 IX86_BUILTIN_BLENDVPS,
22499 IX86_BUILTIN_PBLENDVB128,
22500 IX86_BUILTIN_PBLENDW128,
22505 IX86_BUILTIN_INSERTPS128,
22507 IX86_BUILTIN_MOVNTDQA,
22508 IX86_BUILTIN_MPSADBW128,
22509 IX86_BUILTIN_PACKUSDW128,
22510 IX86_BUILTIN_PCMPEQQ,
22511 IX86_BUILTIN_PHMINPOSUW128,
22513 IX86_BUILTIN_PMAXSB128,
22514 IX86_BUILTIN_PMAXSD128,
22515 IX86_BUILTIN_PMAXUD128,
22516 IX86_BUILTIN_PMAXUW128,
22518 IX86_BUILTIN_PMINSB128,
22519 IX86_BUILTIN_PMINSD128,
22520 IX86_BUILTIN_PMINUD128,
22521 IX86_BUILTIN_PMINUW128,
22523 IX86_BUILTIN_PMOVSXBW128,
22524 IX86_BUILTIN_PMOVSXBD128,
22525 IX86_BUILTIN_PMOVSXBQ128,
22526 IX86_BUILTIN_PMOVSXWD128,
22527 IX86_BUILTIN_PMOVSXWQ128,
22528 IX86_BUILTIN_PMOVSXDQ128,
22530 IX86_BUILTIN_PMOVZXBW128,
22531 IX86_BUILTIN_PMOVZXBD128,
22532 IX86_BUILTIN_PMOVZXBQ128,
22533 IX86_BUILTIN_PMOVZXWD128,
22534 IX86_BUILTIN_PMOVZXWQ128,
22535 IX86_BUILTIN_PMOVZXDQ128,
22537 IX86_BUILTIN_PMULDQ128,
22538 IX86_BUILTIN_PMULLD128,
22540 IX86_BUILTIN_ROUNDPD,
22541 IX86_BUILTIN_ROUNDPS,
22542 IX86_BUILTIN_ROUNDSD,
22543 IX86_BUILTIN_ROUNDSS,
22545 IX86_BUILTIN_PTESTZ,
22546 IX86_BUILTIN_PTESTC,
22547 IX86_BUILTIN_PTESTNZC,
22549 IX86_BUILTIN_VEC_INIT_V2SI,
22550 IX86_BUILTIN_VEC_INIT_V4HI,
22551 IX86_BUILTIN_VEC_INIT_V8QI,
22552 IX86_BUILTIN_VEC_EXT_V2DF,
22553 IX86_BUILTIN_VEC_EXT_V2DI,
22554 IX86_BUILTIN_VEC_EXT_V4SF,
22555 IX86_BUILTIN_VEC_EXT_V4SI,
22556 IX86_BUILTIN_VEC_EXT_V8HI,
22557 IX86_BUILTIN_VEC_EXT_V2SI,
22558 IX86_BUILTIN_VEC_EXT_V4HI,
22559 IX86_BUILTIN_VEC_EXT_V16QI,
22560 IX86_BUILTIN_VEC_SET_V2DI,
22561 IX86_BUILTIN_VEC_SET_V4SF,
22562 IX86_BUILTIN_VEC_SET_V4SI,
22563 IX86_BUILTIN_VEC_SET_V8HI,
22564 IX86_BUILTIN_VEC_SET_V4HI,
22565 IX86_BUILTIN_VEC_SET_V16QI,
22567 IX86_BUILTIN_VEC_PACK_SFIX,
22570 IX86_BUILTIN_CRC32QI,
22571 IX86_BUILTIN_CRC32HI,
22572 IX86_BUILTIN_CRC32SI,
22573 IX86_BUILTIN_CRC32DI,
22575 IX86_BUILTIN_PCMPESTRI128,
22576 IX86_BUILTIN_PCMPESTRM128,
22577 IX86_BUILTIN_PCMPESTRA128,
22578 IX86_BUILTIN_PCMPESTRC128,
22579 IX86_BUILTIN_PCMPESTRO128,
22580 IX86_BUILTIN_PCMPESTRS128,
22581 IX86_BUILTIN_PCMPESTRZ128,
22582 IX86_BUILTIN_PCMPISTRI128,
22583 IX86_BUILTIN_PCMPISTRM128,
22584 IX86_BUILTIN_PCMPISTRA128,
22585 IX86_BUILTIN_PCMPISTRC128,
22586 IX86_BUILTIN_PCMPISTRO128,
22587 IX86_BUILTIN_PCMPISTRS128,
22588 IX86_BUILTIN_PCMPISTRZ128,
22590 IX86_BUILTIN_PCMPGTQ,
22592 /* AES instructions */
22593 IX86_BUILTIN_AESENC128,
22594 IX86_BUILTIN_AESENCLAST128,
22595 IX86_BUILTIN_AESDEC128,
22596 IX86_BUILTIN_AESDECLAST128,
22597 IX86_BUILTIN_AESIMC128,
22598 IX86_BUILTIN_AESKEYGENASSIST128,
22600 /* PCLMUL instruction */
22601 IX86_BUILTIN_PCLMULQDQ128,
22604 IX86_BUILTIN_ADDPD256,
22605 IX86_BUILTIN_ADDPS256,
22606 IX86_BUILTIN_ADDSUBPD256,
22607 IX86_BUILTIN_ADDSUBPS256,
22608 IX86_BUILTIN_ANDPD256,
22609 IX86_BUILTIN_ANDPS256,
22610 IX86_BUILTIN_ANDNPD256,
22611 IX86_BUILTIN_ANDNPS256,
22612 IX86_BUILTIN_BLENDPD256,
22613 IX86_BUILTIN_BLENDPS256,
22614 IX86_BUILTIN_BLENDVPD256,
22615 IX86_BUILTIN_BLENDVPS256,
22616 IX86_BUILTIN_DIVPD256,
22617 IX86_BUILTIN_DIVPS256,
22618 IX86_BUILTIN_DPPS256,
22619 IX86_BUILTIN_HADDPD256,
22620 IX86_BUILTIN_HADDPS256,
22621 IX86_BUILTIN_HSUBPD256,
22622 IX86_BUILTIN_HSUBPS256,
22623 IX86_BUILTIN_MAXPD256,
22624 IX86_BUILTIN_MAXPS256,
22625 IX86_BUILTIN_MINPD256,
22626 IX86_BUILTIN_MINPS256,
22627 IX86_BUILTIN_MULPD256,
22628 IX86_BUILTIN_MULPS256,
22629 IX86_BUILTIN_ORPD256,
22630 IX86_BUILTIN_ORPS256,
22631 IX86_BUILTIN_SHUFPD256,
22632 IX86_BUILTIN_SHUFPS256,
22633 IX86_BUILTIN_SUBPD256,
22634 IX86_BUILTIN_SUBPS256,
22635 IX86_BUILTIN_XORPD256,
22636 IX86_BUILTIN_XORPS256,
22637 IX86_BUILTIN_CMPSD,
22638 IX86_BUILTIN_CMPSS,
22639 IX86_BUILTIN_CMPPD,
22640 IX86_BUILTIN_CMPPS,
22641 IX86_BUILTIN_CMPPD256,
22642 IX86_BUILTIN_CMPPS256,
22643 IX86_BUILTIN_CVTDQ2PD256,
22644 IX86_BUILTIN_CVTDQ2PS256,
22645 IX86_BUILTIN_CVTPD2PS256,
22646 IX86_BUILTIN_CVTPS2DQ256,
22647 IX86_BUILTIN_CVTPS2PD256,
22648 IX86_BUILTIN_CVTTPD2DQ256,
22649 IX86_BUILTIN_CVTPD2DQ256,
22650 IX86_BUILTIN_CVTTPS2DQ256,
22651 IX86_BUILTIN_EXTRACTF128PD256,
22652 IX86_BUILTIN_EXTRACTF128PS256,
22653 IX86_BUILTIN_EXTRACTF128SI256,
22654 IX86_BUILTIN_VZEROALL,
22655 IX86_BUILTIN_VZEROUPPER,
22656 IX86_BUILTIN_VPERMILVARPD,
22657 IX86_BUILTIN_VPERMILVARPS,
22658 IX86_BUILTIN_VPERMILVARPD256,
22659 IX86_BUILTIN_VPERMILVARPS256,
22660 IX86_BUILTIN_VPERMILPD,
22661 IX86_BUILTIN_VPERMILPS,
22662 IX86_BUILTIN_VPERMILPD256,
22663 IX86_BUILTIN_VPERMILPS256,
22664 IX86_BUILTIN_VPERMIL2PD,
22665 IX86_BUILTIN_VPERMIL2PS,
22666 IX86_BUILTIN_VPERMIL2PD256,
22667 IX86_BUILTIN_VPERMIL2PS256,
22668 IX86_BUILTIN_VPERM2F128PD256,
22669 IX86_BUILTIN_VPERM2F128PS256,
22670 IX86_BUILTIN_VPERM2F128SI256,
22671 IX86_BUILTIN_VBROADCASTSS,
22672 IX86_BUILTIN_VBROADCASTSD256,
22673 IX86_BUILTIN_VBROADCASTSS256,
22674 IX86_BUILTIN_VBROADCASTPD256,
22675 IX86_BUILTIN_VBROADCASTPS256,
22676 IX86_BUILTIN_VINSERTF128PD256,
22677 IX86_BUILTIN_VINSERTF128PS256,
22678 IX86_BUILTIN_VINSERTF128SI256,
22679 IX86_BUILTIN_LOADUPD256,
22680 IX86_BUILTIN_LOADUPS256,
22681 IX86_BUILTIN_STOREUPD256,
22682 IX86_BUILTIN_STOREUPS256,
22683 IX86_BUILTIN_LDDQU256,
22684 IX86_BUILTIN_MOVNTDQ256,
22685 IX86_BUILTIN_MOVNTPD256,
22686 IX86_BUILTIN_MOVNTPS256,
22687 IX86_BUILTIN_LOADDQU256,
22688 IX86_BUILTIN_STOREDQU256,
22689 IX86_BUILTIN_MASKLOADPD,
22690 IX86_BUILTIN_MASKLOADPS,
22691 IX86_BUILTIN_MASKSTOREPD,
22692 IX86_BUILTIN_MASKSTOREPS,
22693 IX86_BUILTIN_MASKLOADPD256,
22694 IX86_BUILTIN_MASKLOADPS256,
22695 IX86_BUILTIN_MASKSTOREPD256,
22696 IX86_BUILTIN_MASKSTOREPS256,
22697 IX86_BUILTIN_MOVSHDUP256,
22698 IX86_BUILTIN_MOVSLDUP256,
22699 IX86_BUILTIN_MOVDDUP256,
22701 IX86_BUILTIN_SQRTPD256,
22702 IX86_BUILTIN_SQRTPS256,
22703 IX86_BUILTIN_SQRTPS_NR256,
22704 IX86_BUILTIN_RSQRTPS256,
22705 IX86_BUILTIN_RSQRTPS_NR256,
22707 IX86_BUILTIN_RCPPS256,
22709 IX86_BUILTIN_ROUNDPD256,
22710 IX86_BUILTIN_ROUNDPS256,
22712 IX86_BUILTIN_UNPCKHPD256,
22713 IX86_BUILTIN_UNPCKLPD256,
22714 IX86_BUILTIN_UNPCKHPS256,
22715 IX86_BUILTIN_UNPCKLPS256,
22717 IX86_BUILTIN_SI256_SI,
22718 IX86_BUILTIN_PS256_PS,
22719 IX86_BUILTIN_PD256_PD,
22720 IX86_BUILTIN_SI_SI256,
22721 IX86_BUILTIN_PS_PS256,
22722 IX86_BUILTIN_PD_PD256,
22724 IX86_BUILTIN_VTESTZPD,
22725 IX86_BUILTIN_VTESTCPD,
22726 IX86_BUILTIN_VTESTNZCPD,
22727 IX86_BUILTIN_VTESTZPS,
22728 IX86_BUILTIN_VTESTCPS,
22729 IX86_BUILTIN_VTESTNZCPS,
22730 IX86_BUILTIN_VTESTZPD256,
22731 IX86_BUILTIN_VTESTCPD256,
22732 IX86_BUILTIN_VTESTNZCPD256,
22733 IX86_BUILTIN_VTESTZPS256,
22734 IX86_BUILTIN_VTESTCPS256,
22735 IX86_BUILTIN_VTESTNZCPS256,
22736 IX86_BUILTIN_PTESTZ256,
22737 IX86_BUILTIN_PTESTC256,
22738 IX86_BUILTIN_PTESTNZC256,
22740 IX86_BUILTIN_MOVMSKPD256,
22741 IX86_BUILTIN_MOVMSKPS256,
22743 /* TFmode support builtins. */
22745 IX86_BUILTIN_HUGE_VALQ,
22746 IX86_BUILTIN_FABSQ,
22747 IX86_BUILTIN_COPYSIGNQ,
22749 /* Vectorizer support builtins. */
22750 IX86_BUILTIN_CPYSGNPS,
22751 IX86_BUILTIN_CPYSGNPD,
22752 IX86_BUILTIN_CPYSGNPS256,
22753 IX86_BUILTIN_CPYSGNPD256,
22755 IX86_BUILTIN_CVTUDQ2PS,
22757 IX86_BUILTIN_VEC_PERM_V2DF,
22758 IX86_BUILTIN_VEC_PERM_V4SF,
22759 IX86_BUILTIN_VEC_PERM_V2DI,
22760 IX86_BUILTIN_VEC_PERM_V4SI,
22761 IX86_BUILTIN_VEC_PERM_V8HI,
22762 IX86_BUILTIN_VEC_PERM_V16QI,
22763 IX86_BUILTIN_VEC_PERM_V2DI_U,
22764 IX86_BUILTIN_VEC_PERM_V4SI_U,
22765 IX86_BUILTIN_VEC_PERM_V8HI_U,
22766 IX86_BUILTIN_VEC_PERM_V16QI_U,
22767 IX86_BUILTIN_VEC_PERM_V4DF,
22768 IX86_BUILTIN_VEC_PERM_V8SF,
22770 /* FMA4 and XOP instructions. */
22771 IX86_BUILTIN_VFMADDSS,
22772 IX86_BUILTIN_VFMADDSD,
22773 IX86_BUILTIN_VFMADDPS,
22774 IX86_BUILTIN_VFMADDPD,
22775 IX86_BUILTIN_VFMSUBSS,
22776 IX86_BUILTIN_VFMSUBSD,
22777 IX86_BUILTIN_VFMSUBPS,
22778 IX86_BUILTIN_VFMSUBPD,
22779 IX86_BUILTIN_VFMADDSUBPS,
22780 IX86_BUILTIN_VFMADDSUBPD,
22781 IX86_BUILTIN_VFMSUBADDPS,
22782 IX86_BUILTIN_VFMSUBADDPD,
22783 IX86_BUILTIN_VFNMADDSS,
22784 IX86_BUILTIN_VFNMADDSD,
22785 IX86_BUILTIN_VFNMADDPS,
22786 IX86_BUILTIN_VFNMADDPD,
22787 IX86_BUILTIN_VFNMSUBSS,
22788 IX86_BUILTIN_VFNMSUBSD,
22789 IX86_BUILTIN_VFNMSUBPS,
22790 IX86_BUILTIN_VFNMSUBPD,
22791 IX86_BUILTIN_VFMADDPS256,
22792 IX86_BUILTIN_VFMADDPD256,
22793 IX86_BUILTIN_VFMSUBPS256,
22794 IX86_BUILTIN_VFMSUBPD256,
22795 IX86_BUILTIN_VFMADDSUBPS256,
22796 IX86_BUILTIN_VFMADDSUBPD256,
22797 IX86_BUILTIN_VFMSUBADDPS256,
22798 IX86_BUILTIN_VFMSUBADDPD256,
22799 IX86_BUILTIN_VFNMADDPS256,
22800 IX86_BUILTIN_VFNMADDPD256,
22801 IX86_BUILTIN_VFNMSUBPS256,
22802 IX86_BUILTIN_VFNMSUBPD256,
22804 IX86_BUILTIN_VPCMOV,
22805 IX86_BUILTIN_VPCMOV_V2DI,
22806 IX86_BUILTIN_VPCMOV_V4SI,
22807 IX86_BUILTIN_VPCMOV_V8HI,
22808 IX86_BUILTIN_VPCMOV_V16QI,
22809 IX86_BUILTIN_VPCMOV_V4SF,
22810 IX86_BUILTIN_VPCMOV_V2DF,
22811 IX86_BUILTIN_VPCMOV256,
22812 IX86_BUILTIN_VPCMOV_V4DI256,
22813 IX86_BUILTIN_VPCMOV_V8SI256,
22814 IX86_BUILTIN_VPCMOV_V16HI256,
22815 IX86_BUILTIN_VPCMOV_V32QI256,
22816 IX86_BUILTIN_VPCMOV_V8SF256,
22817 IX86_BUILTIN_VPCMOV_V4DF256,
22819 IX86_BUILTIN_VPPERM,
22821 IX86_BUILTIN_VPMACSSWW,
22822 IX86_BUILTIN_VPMACSWW,
22823 IX86_BUILTIN_VPMACSSWD,
22824 IX86_BUILTIN_VPMACSWD,
22825 IX86_BUILTIN_VPMACSSDD,
22826 IX86_BUILTIN_VPMACSDD,
22827 IX86_BUILTIN_VPMACSSDQL,
22828 IX86_BUILTIN_VPMACSSDQH,
22829 IX86_BUILTIN_VPMACSDQL,
22830 IX86_BUILTIN_VPMACSDQH,
22831 IX86_BUILTIN_VPMADCSSWD,
22832 IX86_BUILTIN_VPMADCSWD,
22834 IX86_BUILTIN_VPHADDBW,
22835 IX86_BUILTIN_VPHADDBD,
22836 IX86_BUILTIN_VPHADDBQ,
22837 IX86_BUILTIN_VPHADDWD,
22838 IX86_BUILTIN_VPHADDWQ,
22839 IX86_BUILTIN_VPHADDDQ,
22840 IX86_BUILTIN_VPHADDUBW,
22841 IX86_BUILTIN_VPHADDUBD,
22842 IX86_BUILTIN_VPHADDUBQ,
22843 IX86_BUILTIN_VPHADDUWD,
22844 IX86_BUILTIN_VPHADDUWQ,
22845 IX86_BUILTIN_VPHADDUDQ,
22846 IX86_BUILTIN_VPHSUBBW,
22847 IX86_BUILTIN_VPHSUBWD,
22848 IX86_BUILTIN_VPHSUBDQ,
22850 IX86_BUILTIN_VPROTB,
22851 IX86_BUILTIN_VPROTW,
22852 IX86_BUILTIN_VPROTD,
22853 IX86_BUILTIN_VPROTQ,
22854 IX86_BUILTIN_VPROTB_IMM,
22855 IX86_BUILTIN_VPROTW_IMM,
22856 IX86_BUILTIN_VPROTD_IMM,
22857 IX86_BUILTIN_VPROTQ_IMM,
22859 IX86_BUILTIN_VPSHLB,
22860 IX86_BUILTIN_VPSHLW,
22861 IX86_BUILTIN_VPSHLD,
22862 IX86_BUILTIN_VPSHLQ,
22863 IX86_BUILTIN_VPSHAB,
22864 IX86_BUILTIN_VPSHAW,
22865 IX86_BUILTIN_VPSHAD,
22866 IX86_BUILTIN_VPSHAQ,
22868 IX86_BUILTIN_VFRCZSS,
22869 IX86_BUILTIN_VFRCZSD,
22870 IX86_BUILTIN_VFRCZPS,
22871 IX86_BUILTIN_VFRCZPD,
22872 IX86_BUILTIN_VFRCZPS256,
22873 IX86_BUILTIN_VFRCZPD256,
22875 IX86_BUILTIN_VPCOMEQUB,
22876 IX86_BUILTIN_VPCOMNEUB,
22877 IX86_BUILTIN_VPCOMLTUB,
22878 IX86_BUILTIN_VPCOMLEUB,
22879 IX86_BUILTIN_VPCOMGTUB,
22880 IX86_BUILTIN_VPCOMGEUB,
22881 IX86_BUILTIN_VPCOMFALSEUB,
22882 IX86_BUILTIN_VPCOMTRUEUB,
22884 IX86_BUILTIN_VPCOMEQUW,
22885 IX86_BUILTIN_VPCOMNEUW,
22886 IX86_BUILTIN_VPCOMLTUW,
22887 IX86_BUILTIN_VPCOMLEUW,
22888 IX86_BUILTIN_VPCOMGTUW,
22889 IX86_BUILTIN_VPCOMGEUW,
22890 IX86_BUILTIN_VPCOMFALSEUW,
22891 IX86_BUILTIN_VPCOMTRUEUW,
22893 IX86_BUILTIN_VPCOMEQUD,
22894 IX86_BUILTIN_VPCOMNEUD,
22895 IX86_BUILTIN_VPCOMLTUD,
22896 IX86_BUILTIN_VPCOMLEUD,
22897 IX86_BUILTIN_VPCOMGTUD,
22898 IX86_BUILTIN_VPCOMGEUD,
22899 IX86_BUILTIN_VPCOMFALSEUD,
22900 IX86_BUILTIN_VPCOMTRUEUD,
22902 IX86_BUILTIN_VPCOMEQUQ,
22903 IX86_BUILTIN_VPCOMNEUQ,
22904 IX86_BUILTIN_VPCOMLTUQ,
22905 IX86_BUILTIN_VPCOMLEUQ,
22906 IX86_BUILTIN_VPCOMGTUQ,
22907 IX86_BUILTIN_VPCOMGEUQ,
22908 IX86_BUILTIN_VPCOMFALSEUQ,
22909 IX86_BUILTIN_VPCOMTRUEUQ,
22911 IX86_BUILTIN_VPCOMEQB,
22912 IX86_BUILTIN_VPCOMNEB,
22913 IX86_BUILTIN_VPCOMLTB,
22914 IX86_BUILTIN_VPCOMLEB,
22915 IX86_BUILTIN_VPCOMGTB,
22916 IX86_BUILTIN_VPCOMGEB,
22917 IX86_BUILTIN_VPCOMFALSEB,
22918 IX86_BUILTIN_VPCOMTRUEB,
22920 IX86_BUILTIN_VPCOMEQW,
22921 IX86_BUILTIN_VPCOMNEW,
22922 IX86_BUILTIN_VPCOMLTW,
22923 IX86_BUILTIN_VPCOMLEW,
22924 IX86_BUILTIN_VPCOMGTW,
22925 IX86_BUILTIN_VPCOMGEW,
22926 IX86_BUILTIN_VPCOMFALSEW,
22927 IX86_BUILTIN_VPCOMTRUEW,
22929 IX86_BUILTIN_VPCOMEQD,
22930 IX86_BUILTIN_VPCOMNED,
22931 IX86_BUILTIN_VPCOMLTD,
22932 IX86_BUILTIN_VPCOMLED,
22933 IX86_BUILTIN_VPCOMGTD,
22934 IX86_BUILTIN_VPCOMGED,
22935 IX86_BUILTIN_VPCOMFALSED,
22936 IX86_BUILTIN_VPCOMTRUED,
22938 IX86_BUILTIN_VPCOMEQQ,
22939 IX86_BUILTIN_VPCOMNEQ,
22940 IX86_BUILTIN_VPCOMLTQ,
22941 IX86_BUILTIN_VPCOMLEQ,
22942 IX86_BUILTIN_VPCOMGTQ,
22943 IX86_BUILTIN_VPCOMGEQ,
22944 IX86_BUILTIN_VPCOMFALSEQ,
22945 IX86_BUILTIN_VPCOMTRUEQ,
22947 /* LWP instructions. */
22948 IX86_BUILTIN_LLWPCB,
22949 IX86_BUILTIN_SLWPCB,
22950 IX86_BUILTIN_LWPVAL32,
22951 IX86_BUILTIN_LWPVAL64,
22952 IX86_BUILTIN_LWPINS32,
22953 IX86_BUILTIN_LWPINS64,
22957 /* FSGSBASE instructions. */
22958 IX86_BUILTIN_RDFSBASE32,
22959 IX86_BUILTIN_RDFSBASE64,
22960 IX86_BUILTIN_RDGSBASE32,
22961 IX86_BUILTIN_RDGSBASE64,
22962 IX86_BUILTIN_WRFSBASE32,
22963 IX86_BUILTIN_WRFSBASE64,
22964 IX86_BUILTIN_WRGSBASE32,
22965 IX86_BUILTIN_WRGSBASE64,
22967 /* RDRND instructions. */
22968 IX86_BUILTIN_RDRAND16,
22969 IX86_BUILTIN_RDRAND32,
22970 IX86_BUILTIN_RDRAND64,
22972 /* F16C instructions. */
22973 IX86_BUILTIN_CVTPH2PS,
22974 IX86_BUILTIN_CVTPH2PS256,
22975 IX86_BUILTIN_CVTPS2PH,
22976 IX86_BUILTIN_CVTPS2PH256,
22981 /* Table for the ix86 builtin decls. */
22982 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22984 /* Table of all of the builtin functions that are possible with different ISA's
22985 but are waiting to be built until a function is declared to use that
22987 struct builtin_isa {
22988 const char *name; /* function name */
22989 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22990 int isa; /* isa_flags this builtin is defined for */
22991 bool const_p; /* true if the declaration is constant */
22992 bool set_and_not_built_p;
22995 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22998 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22999 of which isa_flags to use in the ix86_builtins_isa array. Stores the
23000 function decl in the ix86_builtins array. Returns the function decl or
23001 NULL_TREE, if the builtin was not added.
23003 If the front end has a special hook for builtin functions, delay adding
23004 builtin functions that aren't in the current ISA until the ISA is changed
23005 with function specific optimization. Doing so, can save about 300K for the
23006 default compiler. When the builtin is expanded, check at that time whether
23009 If the front end doesn't have a special hook, record all builtins, even if
23010 it isn't an instruction set in the current ISA in case the user uses
23011 function specific options for a different ISA, so that we don't get scope
23012 errors if a builtin is added in the middle of a function scope. */
23015 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
23016 enum ix86_builtins code)
23018 tree decl = NULL_TREE;
23020 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
23022 ix86_builtins_isa[(int) code].isa = mask;
23024 mask &= ~OPTION_MASK_ISA_64BIT;
23026 || (mask & ix86_isa_flags) != 0
23027 || (lang_hooks.builtin_function
23028 == lang_hooks.builtin_function_ext_scope))
23031 tree type = ix86_get_builtin_func_type (tcode);
23032 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
23034 ix86_builtins[(int) code] = decl;
23035 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
23039 ix86_builtins[(int) code] = NULL_TREE;
23040 ix86_builtins_isa[(int) code].tcode = tcode;
23041 ix86_builtins_isa[(int) code].name = name;
23042 ix86_builtins_isa[(int) code].const_p = false;
23043 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
23050 /* Like def_builtin, but also marks the function decl "const". */
23053 def_builtin_const (int mask, const char *name,
23054 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
23056 tree decl = def_builtin (mask, name, tcode, code);
23058 TREE_READONLY (decl) = 1;
23060 ix86_builtins_isa[(int) code].const_p = true;
23065 /* Add any new builtin functions for a given ISA that may not have been
23066 declared. This saves a bit of space compared to adding all of the
23067 declarations to the tree, even if we didn't use them. */
23070 ix86_add_new_builtins (int isa)
23074 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
23076 if ((ix86_builtins_isa[i].isa & isa) != 0
23077 && ix86_builtins_isa[i].set_and_not_built_p)
23081 /* Don't define the builtin again. */
23082 ix86_builtins_isa[i].set_and_not_built_p = false;
23084 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
23085 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23086 type, i, BUILT_IN_MD, NULL,
23089 ix86_builtins[i] = decl;
23090 if (ix86_builtins_isa[i].const_p)
23091 TREE_READONLY (decl) = 1;
23096 /* Bits for builtin_description.flag. */
23098 /* Set when we don't support the comparison natively, and should
23099 swap_comparison in order to support it. */
23100 #define BUILTIN_DESC_SWAP_OPERANDS 1
23102 struct builtin_description
23104 const unsigned int mask;
23105 const enum insn_code icode;
23106 const char *const name;
23107 const enum ix86_builtins code;
23108 const enum rtx_code comparison;
23112 static const struct builtin_description bdesc_comi[] =
23114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23140 static const struct builtin_description bdesc_pcmpestr[] =
23143 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23144 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23145 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23146 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23147 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23148 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23149 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23152 static const struct builtin_description bdesc_pcmpistr[] =
23155 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23156 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23157 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23158 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23159 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23160 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23161 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23164 /* Special builtins with variable number of arguments. */
23165 static const struct builtin_description bdesc_special_args[] =
23167 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23168 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23174 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23177 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23179 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23181 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23182 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23183 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23184 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23186 /* SSE or 3DNow!A */
23187 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23188 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23205 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23208 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23211 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23212 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23218 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23219 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23220 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23245 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23246 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23247 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23248 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23249 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23250 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23253 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23254 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23255 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23256 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23257 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23258 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23259 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23260 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23263 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23264 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23265 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23268 /* Builtins with variable number of arguments. */
23269 static const struct builtin_description bdesc_args[] =
23271 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23272 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23273 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23274 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23275 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23276 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23277 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23344 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23345 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23346 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23347 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23349 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23350 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23351 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23352 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23353 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23354 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23355 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23356 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23357 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23358 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23359 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23360 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23361 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23362 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23363 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23366 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23367 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23368 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23369 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23370 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23371 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23376 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23378 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23382 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23385 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23389 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23390 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23391 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23421 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23422 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23426 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23428 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23429 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23431 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23436 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23437 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23441 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23443 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23449 /* SSE MMX or 3Dnow!A */
23450 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23451 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23452 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23454 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23455 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23456 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23457 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23459 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23460 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23462 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23467 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23468 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23469 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23470 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23471 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23472 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23473 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23474 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23475 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23476 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23477 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23478 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23497 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23498 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23504 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23505 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23506 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23507 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23535 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23539 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23541 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23542 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23544 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23547 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23548 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23550 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23552 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23553 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23554 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23555 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23556 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23557 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23558 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23559 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23570 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23571 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23573 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23575 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23576 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23588 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23589 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23590 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23593 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23594 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23595 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23596 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23597 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23598 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23599 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23600 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23606 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23615 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23620 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23621 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23622 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23623 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23624 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23625 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23628 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23629 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23630 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23631 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23632 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23633 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23635 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23636 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23637 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23638 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23646 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23647 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23652 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23653 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23656 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23657 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23659 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23660 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23661 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23662 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23663 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23664 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23743 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23744 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23745 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23746 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23748 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23749 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23750 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23753 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23754 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23755 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23756 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23757 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23760 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23761 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23762 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23763 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23766 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23767 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23769 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23770 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23771 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23772 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23775 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23778 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23779 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23782 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23783 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23786 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23792 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23793 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23794 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23795 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23796 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23797 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23798 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23799 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23800 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23801 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23802 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23803 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23849 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23851 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23853 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23868 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23869 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23870 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23891 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23892 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23894 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23897 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23898 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23899 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23900 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23903 /* FMA4 and XOP. */
23904 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23905 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23906 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23907 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23908 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23909 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23910 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23911 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23912 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23913 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23914 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23915 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23916 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23917 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23918 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23919 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23920 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23921 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23922 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23923 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23924 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23925 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23926 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23927 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23928 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23929 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23930 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23931 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23932 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23933 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23934 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23935 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23936 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23937 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23938 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23939 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23940 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23941 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23942 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23943 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23944 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23945 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23946 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23947 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23948 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23949 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23950 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23951 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23952 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23953 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23954 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23955 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23957 static const struct builtin_description bdesc_multi_arg[] =
23959 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23960 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23961 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23962 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23963 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23964 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23965 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23966 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23968 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23969 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23970 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23971 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23972 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23973 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23974 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23975 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23977 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23978 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23979 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23980 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23982 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23983 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23984 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23985 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23987 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23988 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23989 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23990 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23992 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23993 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23994 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23995 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
24000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
24001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
24002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
24003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
24005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
24008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
24009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
24010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
24011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
24013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
24015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
24030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
24031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
24032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
24033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
24034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
24035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
24036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
24038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
24039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
24040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
24042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
24043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
24045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
24046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
24047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
24048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
24049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
24050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
24052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
24069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
24072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
24073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
24074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
24076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
24077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
24080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
24081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
24082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
24084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
24085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
24088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
24089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
24090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
24092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
24096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
24097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
24098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24157 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24158 in the current target ISA to allow the user to compile particular modules
24159 with different target specific options that differ from the command line
24162 ix86_init_mmx_sse_builtins (void)
24164 const struct builtin_description * d;
24165 enum ix86_builtin_func_type ftype;
24168 /* Add all special builtins with variable number of operands. */
24169 for (i = 0, d = bdesc_special_args;
24170 i < ARRAY_SIZE (bdesc_special_args);
24176 ftype = (enum ix86_builtin_func_type) d->flag;
24177 def_builtin (d->mask, d->name, ftype, d->code);
24180 /* Add all builtins with variable number of operands. */
24181 for (i = 0, d = bdesc_args;
24182 i < ARRAY_SIZE (bdesc_args);
24188 ftype = (enum ix86_builtin_func_type) d->flag;
24189 def_builtin_const (d->mask, d->name, ftype, d->code);
24192 /* pcmpestr[im] insns. */
24193 for (i = 0, d = bdesc_pcmpestr;
24194 i < ARRAY_SIZE (bdesc_pcmpestr);
24197 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24198 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24200 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24201 def_builtin_const (d->mask, d->name, ftype, d->code);
24204 /* pcmpistr[im] insns. */
24205 for (i = 0, d = bdesc_pcmpistr;
24206 i < ARRAY_SIZE (bdesc_pcmpistr);
24209 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24210 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24212 ftype = INT_FTYPE_V16QI_V16QI_INT;
24213 def_builtin_const (d->mask, d->name, ftype, d->code);
24216 /* comi/ucomi insns. */
24217 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24219 if (d->mask == OPTION_MASK_ISA_SSE2)
24220 ftype = INT_FTYPE_V2DF_V2DF;
24222 ftype = INT_FTYPE_V4SF_V4SF;
24223 def_builtin_const (d->mask, d->name, ftype, d->code);
24227 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24228 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24229 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24230 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24232 /* SSE or 3DNow!A */
24233 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24234 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24235 IX86_BUILTIN_MASKMOVQ);
24238 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24239 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24241 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24242 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24243 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24244 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24247 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24248 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24249 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24250 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24253 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24254 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24255 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24256 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24257 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24258 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24259 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24260 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24261 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24262 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24263 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24264 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24267 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24268 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24270 /* MMX access to the vec_init patterns. */
24271 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24272 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24274 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24275 V4HI_FTYPE_HI_HI_HI_HI,
24276 IX86_BUILTIN_VEC_INIT_V4HI);
24278 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24279 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24280 IX86_BUILTIN_VEC_INIT_V8QI);
24282 /* Access to the vec_extract patterns. */
24283 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24284 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24285 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24286 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24287 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24288 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24289 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24290 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24291 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24292 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24294 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24295 "__builtin_ia32_vec_ext_v4hi",
24296 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24298 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24299 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24301 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24302 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24304 /* Access to the vec_set patterns. */
24305 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24306 "__builtin_ia32_vec_set_v2di",
24307 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24309 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24310 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24312 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24313 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24315 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24316 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24318 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24319 "__builtin_ia32_vec_set_v4hi",
24320 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24322 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24323 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24325 /* Add FMA4 multi-arg argument instructions */
24326 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24331 ftype = (enum ix86_builtin_func_type) d->flag;
24332 def_builtin_const (d->mask, d->name, ftype, d->code);
24336 /* Internal method for ix86_init_builtins. */
24339 ix86_init_builtins_va_builtins_abi (void)
24341 tree ms_va_ref, sysv_va_ref;
24342 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24343 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24344 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24345 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24349 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24350 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24351 ms_va_ref = build_reference_type (ms_va_list_type_node);
24353 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24356 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24357 fnvoid_va_start_ms =
24358 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24359 fnvoid_va_end_sysv =
24360 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24361 fnvoid_va_start_sysv =
24362 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24364 fnvoid_va_copy_ms =
24365 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24367 fnvoid_va_copy_sysv =
24368 build_function_type_list (void_type_node, sysv_va_ref,
24369 sysv_va_ref, NULL_TREE);
24371 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24372 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24373 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24374 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24375 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24376 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24377 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24378 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24379 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24380 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24381 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24382 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24386 ix86_init_builtin_types (void)
24388 tree float128_type_node, float80_type_node;
24390 /* The __float80 type. */
24391 float80_type_node = long_double_type_node;
24392 if (TYPE_MODE (float80_type_node) != XFmode)
24394 /* The __float80 type. */
24395 float80_type_node = make_node (REAL_TYPE);
24397 TYPE_PRECISION (float80_type_node) = 80;
24398 layout_type (float80_type_node);
24400 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24402 /* The __float128 type. */
24403 float128_type_node = make_node (REAL_TYPE);
24404 TYPE_PRECISION (float128_type_node) = 128;
24405 layout_type (float128_type_node);
24406 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24408 /* This macro is built by i386-builtin-types.awk. */
24409 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24413 ix86_init_builtins (void)
24417 ix86_init_builtin_types ();
24419 /* TFmode support builtins. */
24420 def_builtin_const (0, "__builtin_infq",
24421 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24422 def_builtin_const (0, "__builtin_huge_valq",
24423 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24425 /* We will expand them to normal call if SSE2 isn't available since
24426 they are used by libgcc. */
24427 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24428 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24429 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24430 TREE_READONLY (t) = 1;
24431 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24433 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24434 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24435 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24436 TREE_READONLY (t) = 1;
24437 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24439 ix86_init_mmx_sse_builtins ();
24442 ix86_init_builtins_va_builtins_abi ();
24445 /* Return the ix86 builtin for CODE. */
24448 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24450 if (code >= IX86_BUILTIN_MAX)
24451 return error_mark_node;
24453 return ix86_builtins[code];
24456 /* Errors in the source file can cause expand_expr to return const0_rtx
24457 where we expect a vector. To avoid crashing, use one of the vector
24458 clear instructions. */
24460 safe_vector_operand (rtx x, enum machine_mode mode)
24462 if (x == const0_rtx)
24463 x = CONST0_RTX (mode);
24467 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24470 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24473 tree arg0 = CALL_EXPR_ARG (exp, 0);
24474 tree arg1 = CALL_EXPR_ARG (exp, 1);
24475 rtx op0 = expand_normal (arg0);
24476 rtx op1 = expand_normal (arg1);
24477 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24478 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24479 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24481 if (VECTOR_MODE_P (mode0))
24482 op0 = safe_vector_operand (op0, mode0);
24483 if (VECTOR_MODE_P (mode1))
24484 op1 = safe_vector_operand (op1, mode1);
24486 if (optimize || !target
24487 || GET_MODE (target) != tmode
24488 || !insn_data[icode].operand[0].predicate (target, tmode))
24489 target = gen_reg_rtx (tmode);
24491 if (GET_MODE (op1) == SImode && mode1 == TImode)
24493 rtx x = gen_reg_rtx (V4SImode);
24494 emit_insn (gen_sse2_loadd (x, op1));
24495 op1 = gen_lowpart (TImode, x);
24498 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24499 op0 = copy_to_mode_reg (mode0, op0);
24500 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24501 op1 = copy_to_mode_reg (mode1, op1);
24503 pat = GEN_FCN (icode) (target, op0, op1);
24512 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24515 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24516 enum ix86_builtin_func_type m_type,
24517 enum rtx_code sub_code)
24522 bool comparison_p = false;
24524 bool last_arg_constant = false;
24525 int num_memory = 0;
24528 enum machine_mode mode;
24531 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24535 case MULTI_ARG_4_DF2_DI_I:
24536 case MULTI_ARG_4_DF2_DI_I1:
24537 case MULTI_ARG_4_SF2_SI_I:
24538 case MULTI_ARG_4_SF2_SI_I1:
24540 last_arg_constant = true;
24543 case MULTI_ARG_3_SF:
24544 case MULTI_ARG_3_DF:
24545 case MULTI_ARG_3_SF2:
24546 case MULTI_ARG_3_DF2:
24547 case MULTI_ARG_3_DI:
24548 case MULTI_ARG_3_SI:
24549 case MULTI_ARG_3_SI_DI:
24550 case MULTI_ARG_3_HI:
24551 case MULTI_ARG_3_HI_SI:
24552 case MULTI_ARG_3_QI:
24553 case MULTI_ARG_3_DI2:
24554 case MULTI_ARG_3_SI2:
24555 case MULTI_ARG_3_HI2:
24556 case MULTI_ARG_3_QI2:
24560 case MULTI_ARG_2_SF:
24561 case MULTI_ARG_2_DF:
24562 case MULTI_ARG_2_DI:
24563 case MULTI_ARG_2_SI:
24564 case MULTI_ARG_2_HI:
24565 case MULTI_ARG_2_QI:
24569 case MULTI_ARG_2_DI_IMM:
24570 case MULTI_ARG_2_SI_IMM:
24571 case MULTI_ARG_2_HI_IMM:
24572 case MULTI_ARG_2_QI_IMM:
24574 last_arg_constant = true;
24577 case MULTI_ARG_1_SF:
24578 case MULTI_ARG_1_DF:
24579 case MULTI_ARG_1_SF2:
24580 case MULTI_ARG_1_DF2:
24581 case MULTI_ARG_1_DI:
24582 case MULTI_ARG_1_SI:
24583 case MULTI_ARG_1_HI:
24584 case MULTI_ARG_1_QI:
24585 case MULTI_ARG_1_SI_DI:
24586 case MULTI_ARG_1_HI_DI:
24587 case MULTI_ARG_1_HI_SI:
24588 case MULTI_ARG_1_QI_DI:
24589 case MULTI_ARG_1_QI_SI:
24590 case MULTI_ARG_1_QI_HI:
24594 case MULTI_ARG_2_DI_CMP:
24595 case MULTI_ARG_2_SI_CMP:
24596 case MULTI_ARG_2_HI_CMP:
24597 case MULTI_ARG_2_QI_CMP:
24599 comparison_p = true;
24602 case MULTI_ARG_2_SF_TF:
24603 case MULTI_ARG_2_DF_TF:
24604 case MULTI_ARG_2_DI_TF:
24605 case MULTI_ARG_2_SI_TF:
24606 case MULTI_ARG_2_HI_TF:
24607 case MULTI_ARG_2_QI_TF:
24613 gcc_unreachable ();
24616 if (optimize || !target
24617 || GET_MODE (target) != tmode
24618 || !insn_data[icode].operand[0].predicate (target, tmode))
24619 target = gen_reg_rtx (tmode);
24621 gcc_assert (nargs <= 4);
24623 for (i = 0; i < nargs; i++)
24625 tree arg = CALL_EXPR_ARG (exp, i);
24626 rtx op = expand_normal (arg);
24627 int adjust = (comparison_p) ? 1 : 0;
24628 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24630 if (last_arg_constant && i == nargs-1)
24632 if (!CONST_INT_P (op))
24634 error ("last argument must be an immediate");
24635 return gen_reg_rtx (tmode);
24640 if (VECTOR_MODE_P (mode))
24641 op = safe_vector_operand (op, mode);
24643 /* If we aren't optimizing, only allow one memory operand to be
24645 if (memory_operand (op, mode))
24648 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24651 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24653 op = force_reg (mode, op);
24657 args[i].mode = mode;
24663 pat = GEN_FCN (icode) (target, args[0].op);
24668 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24669 GEN_INT ((int)sub_code));
24670 else if (! comparison_p)
24671 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24674 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24678 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24683 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24687 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24691 gcc_unreachable ();
24701 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24702 insns with vec_merge. */
24705 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24709 tree arg0 = CALL_EXPR_ARG (exp, 0);
24710 rtx op1, op0 = expand_normal (arg0);
24711 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24712 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24714 if (optimize || !target
24715 || GET_MODE (target) != tmode
24716 || !insn_data[icode].operand[0].predicate (target, tmode))
24717 target = gen_reg_rtx (tmode);
24719 if (VECTOR_MODE_P (mode0))
24720 op0 = safe_vector_operand (op0, mode0);
24722 if ((optimize && !register_operand (op0, mode0))
24723 || !insn_data[icode].operand[1].predicate (op0, mode0))
24724 op0 = copy_to_mode_reg (mode0, op0);
24727 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24728 op1 = copy_to_mode_reg (mode0, op1);
24730 pat = GEN_FCN (icode) (target, op0, op1);
24737 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24740 ix86_expand_sse_compare (const struct builtin_description *d,
24741 tree exp, rtx target, bool swap)
24744 tree arg0 = CALL_EXPR_ARG (exp, 0);
24745 tree arg1 = CALL_EXPR_ARG (exp, 1);
24746 rtx op0 = expand_normal (arg0);
24747 rtx op1 = expand_normal (arg1);
24749 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24750 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24751 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24752 enum rtx_code comparison = d->comparison;
24754 if (VECTOR_MODE_P (mode0))
24755 op0 = safe_vector_operand (op0, mode0);
24756 if (VECTOR_MODE_P (mode1))
24757 op1 = safe_vector_operand (op1, mode1);
24759 /* Swap operands if we have a comparison that isn't available in
24763 rtx tmp = gen_reg_rtx (mode1);
24764 emit_move_insn (tmp, op1);
24769 if (optimize || !target
24770 || GET_MODE (target) != tmode
24771 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24772 target = gen_reg_rtx (tmode);
24774 if ((optimize && !register_operand (op0, mode0))
24775 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24776 op0 = copy_to_mode_reg (mode0, op0);
24777 if ((optimize && !register_operand (op1, mode1))
24778 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24779 op1 = copy_to_mode_reg (mode1, op1);
24781 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24782 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24789 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24792 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24796 tree arg0 = CALL_EXPR_ARG (exp, 0);
24797 tree arg1 = CALL_EXPR_ARG (exp, 1);
24798 rtx op0 = expand_normal (arg0);
24799 rtx op1 = expand_normal (arg1);
24800 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24801 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24802 enum rtx_code comparison = d->comparison;
24804 if (VECTOR_MODE_P (mode0))
24805 op0 = safe_vector_operand (op0, mode0);
24806 if (VECTOR_MODE_P (mode1))
24807 op1 = safe_vector_operand (op1, mode1);
24809 /* Swap operands if we have a comparison that isn't available in
24811 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24818 target = gen_reg_rtx (SImode);
24819 emit_move_insn (target, const0_rtx);
24820 target = gen_rtx_SUBREG (QImode, target, 0);
24822 if ((optimize && !register_operand (op0, mode0))
24823 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24824 op0 = copy_to_mode_reg (mode0, op0);
24825 if ((optimize && !register_operand (op1, mode1))
24826 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24827 op1 = copy_to_mode_reg (mode1, op1);
24829 pat = GEN_FCN (d->icode) (op0, op1);
24833 emit_insn (gen_rtx_SET (VOIDmode,
24834 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24835 gen_rtx_fmt_ee (comparison, QImode,
24839 return SUBREG_REG (target);
24842 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24845 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24849 tree arg0 = CALL_EXPR_ARG (exp, 0);
24850 tree arg1 = CALL_EXPR_ARG (exp, 1);
24851 rtx op0 = expand_normal (arg0);
24852 rtx op1 = expand_normal (arg1);
24853 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24854 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24855 enum rtx_code comparison = d->comparison;
24857 if (VECTOR_MODE_P (mode0))
24858 op0 = safe_vector_operand (op0, mode0);
24859 if (VECTOR_MODE_P (mode1))
24860 op1 = safe_vector_operand (op1, mode1);
24862 target = gen_reg_rtx (SImode);
24863 emit_move_insn (target, const0_rtx);
24864 target = gen_rtx_SUBREG (QImode, target, 0);
24866 if ((optimize && !register_operand (op0, mode0))
24867 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24868 op0 = copy_to_mode_reg (mode0, op0);
24869 if ((optimize && !register_operand (op1, mode1))
24870 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24871 op1 = copy_to_mode_reg (mode1, op1);
24873 pat = GEN_FCN (d->icode) (op0, op1);
24877 emit_insn (gen_rtx_SET (VOIDmode,
24878 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24879 gen_rtx_fmt_ee (comparison, QImode,
24883 return SUBREG_REG (target);
24886 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24889 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24890 tree exp, rtx target)
24893 tree arg0 = CALL_EXPR_ARG (exp, 0);
24894 tree arg1 = CALL_EXPR_ARG (exp, 1);
24895 tree arg2 = CALL_EXPR_ARG (exp, 2);
24896 tree arg3 = CALL_EXPR_ARG (exp, 3);
24897 tree arg4 = CALL_EXPR_ARG (exp, 4);
24898 rtx scratch0, scratch1;
24899 rtx op0 = expand_normal (arg0);
24900 rtx op1 = expand_normal (arg1);
24901 rtx op2 = expand_normal (arg2);
24902 rtx op3 = expand_normal (arg3);
24903 rtx op4 = expand_normal (arg4);
24904 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24906 tmode0 = insn_data[d->icode].operand[0].mode;
24907 tmode1 = insn_data[d->icode].operand[1].mode;
24908 modev2 = insn_data[d->icode].operand[2].mode;
24909 modei3 = insn_data[d->icode].operand[3].mode;
24910 modev4 = insn_data[d->icode].operand[4].mode;
24911 modei5 = insn_data[d->icode].operand[5].mode;
24912 modeimm = insn_data[d->icode].operand[6].mode;
24914 if (VECTOR_MODE_P (modev2))
24915 op0 = safe_vector_operand (op0, modev2);
24916 if (VECTOR_MODE_P (modev4))
24917 op2 = safe_vector_operand (op2, modev4);
24919 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24920 op0 = copy_to_mode_reg (modev2, op0);
24921 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24922 op1 = copy_to_mode_reg (modei3, op1);
24923 if ((optimize && !register_operand (op2, modev4))
24924 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24925 op2 = copy_to_mode_reg (modev4, op2);
24926 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24927 op3 = copy_to_mode_reg (modei5, op3);
24929 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24931 error ("the fifth argument must be a 8-bit immediate");
24935 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24937 if (optimize || !target
24938 || GET_MODE (target) != tmode0
24939 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24940 target = gen_reg_rtx (tmode0);
24942 scratch1 = gen_reg_rtx (tmode1);
24944 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24946 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24948 if (optimize || !target
24949 || GET_MODE (target) != tmode1
24950 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24951 target = gen_reg_rtx (tmode1);
24953 scratch0 = gen_reg_rtx (tmode0);
24955 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24959 gcc_assert (d->flag);
24961 scratch0 = gen_reg_rtx (tmode0);
24962 scratch1 = gen_reg_rtx (tmode1);
24964 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24974 target = gen_reg_rtx (SImode);
24975 emit_move_insn (target, const0_rtx);
24976 target = gen_rtx_SUBREG (QImode, target, 0);
24979 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24980 gen_rtx_fmt_ee (EQ, QImode,
24981 gen_rtx_REG ((enum machine_mode) d->flag,
24984 return SUBREG_REG (target);
24991 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24994 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24995 tree exp, rtx target)
24998 tree arg0 = CALL_EXPR_ARG (exp, 0);
24999 tree arg1 = CALL_EXPR_ARG (exp, 1);
25000 tree arg2 = CALL_EXPR_ARG (exp, 2);
25001 rtx scratch0, scratch1;
25002 rtx op0 = expand_normal (arg0);
25003 rtx op1 = expand_normal (arg1);
25004 rtx op2 = expand_normal (arg2);
25005 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
25007 tmode0 = insn_data[d->icode].operand[0].mode;
25008 tmode1 = insn_data[d->icode].operand[1].mode;
25009 modev2 = insn_data[d->icode].operand[2].mode;
25010 modev3 = insn_data[d->icode].operand[3].mode;
25011 modeimm = insn_data[d->icode].operand[4].mode;
25013 if (VECTOR_MODE_P (modev2))
25014 op0 = safe_vector_operand (op0, modev2);
25015 if (VECTOR_MODE_P (modev3))
25016 op1 = safe_vector_operand (op1, modev3);
25018 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25019 op0 = copy_to_mode_reg (modev2, op0);
25020 if ((optimize && !register_operand (op1, modev3))
25021 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
25022 op1 = copy_to_mode_reg (modev3, op1);
25024 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
25026 error ("the third argument must be a 8-bit immediate");
25030 if (d->code == IX86_BUILTIN_PCMPISTRI128)
25032 if (optimize || !target
25033 || GET_MODE (target) != tmode0
25034 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25035 target = gen_reg_rtx (tmode0);
25037 scratch1 = gen_reg_rtx (tmode1);
25039 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
25041 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
25043 if (optimize || !target
25044 || GET_MODE (target) != tmode1
25045 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25046 target = gen_reg_rtx (tmode1);
25048 scratch0 = gen_reg_rtx (tmode0);
25050 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
25054 gcc_assert (d->flag);
25056 scratch0 = gen_reg_rtx (tmode0);
25057 scratch1 = gen_reg_rtx (tmode1);
25059 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
25069 target = gen_reg_rtx (SImode);
25070 emit_move_insn (target, const0_rtx);
25071 target = gen_rtx_SUBREG (QImode, target, 0);
25074 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25075 gen_rtx_fmt_ee (EQ, QImode,
25076 gen_rtx_REG ((enum machine_mode) d->flag,
25079 return SUBREG_REG (target);
25085 /* Subroutine of ix86_expand_builtin to take care of insns with
25086 variable number of operands. */
25089 ix86_expand_args_builtin (const struct builtin_description *d,
25090 tree exp, rtx target)
25092 rtx pat, real_target;
25093 unsigned int i, nargs;
25094 unsigned int nargs_constant = 0;
25095 int num_memory = 0;
25099 enum machine_mode mode;
25101 bool last_arg_count = false;
25102 enum insn_code icode = d->icode;
25103 const struct insn_data_d *insn_p = &insn_data[icode];
25104 enum machine_mode tmode = insn_p->operand[0].mode;
25105 enum machine_mode rmode = VOIDmode;
25107 enum rtx_code comparison = d->comparison;
25109 switch ((enum ix86_builtin_func_type) d->flag)
25111 case INT_FTYPE_V8SF_V8SF_PTEST:
25112 case INT_FTYPE_V4DI_V4DI_PTEST:
25113 case INT_FTYPE_V4DF_V4DF_PTEST:
25114 case INT_FTYPE_V4SF_V4SF_PTEST:
25115 case INT_FTYPE_V2DI_V2DI_PTEST:
25116 case INT_FTYPE_V2DF_V2DF_PTEST:
25117 return ix86_expand_sse_ptest (d, exp, target);
25118 case FLOAT128_FTYPE_FLOAT128:
25119 case FLOAT_FTYPE_FLOAT:
25120 case INT_FTYPE_INT:
25121 case UINT64_FTYPE_INT:
25122 case UINT16_FTYPE_UINT16:
25123 case INT64_FTYPE_INT64:
25124 case INT64_FTYPE_V4SF:
25125 case INT64_FTYPE_V2DF:
25126 case INT_FTYPE_V16QI:
25127 case INT_FTYPE_V8QI:
25128 case INT_FTYPE_V8SF:
25129 case INT_FTYPE_V4DF:
25130 case INT_FTYPE_V4SF:
25131 case INT_FTYPE_V2DF:
25132 case V16QI_FTYPE_V16QI:
25133 case V8SI_FTYPE_V8SF:
25134 case V8SI_FTYPE_V4SI:
25135 case V8HI_FTYPE_V8HI:
25136 case V8HI_FTYPE_V16QI:
25137 case V8QI_FTYPE_V8QI:
25138 case V8SF_FTYPE_V8SF:
25139 case V8SF_FTYPE_V8SI:
25140 case V8SF_FTYPE_V4SF:
25141 case V8SF_FTYPE_V8HI:
25142 case V4SI_FTYPE_V4SI:
25143 case V4SI_FTYPE_V16QI:
25144 case V4SI_FTYPE_V4SF:
25145 case V4SI_FTYPE_V8SI:
25146 case V4SI_FTYPE_V8HI:
25147 case V4SI_FTYPE_V4DF:
25148 case V4SI_FTYPE_V2DF:
25149 case V4HI_FTYPE_V4HI:
25150 case V4DF_FTYPE_V4DF:
25151 case V4DF_FTYPE_V4SI:
25152 case V4DF_FTYPE_V4SF:
25153 case V4DF_FTYPE_V2DF:
25154 case V4SF_FTYPE_V4SF:
25155 case V4SF_FTYPE_V4SI:
25156 case V4SF_FTYPE_V8SF:
25157 case V4SF_FTYPE_V4DF:
25158 case V4SF_FTYPE_V8HI:
25159 case V4SF_FTYPE_V2DF:
25160 case V2DI_FTYPE_V2DI:
25161 case V2DI_FTYPE_V16QI:
25162 case V2DI_FTYPE_V8HI:
25163 case V2DI_FTYPE_V4SI:
25164 case V2DF_FTYPE_V2DF:
25165 case V2DF_FTYPE_V4SI:
25166 case V2DF_FTYPE_V4DF:
25167 case V2DF_FTYPE_V4SF:
25168 case V2DF_FTYPE_V2SI:
25169 case V2SI_FTYPE_V2SI:
25170 case V2SI_FTYPE_V4SF:
25171 case V2SI_FTYPE_V2SF:
25172 case V2SI_FTYPE_V2DF:
25173 case V2SF_FTYPE_V2SF:
25174 case V2SF_FTYPE_V2SI:
25177 case V4SF_FTYPE_V4SF_VEC_MERGE:
25178 case V2DF_FTYPE_V2DF_VEC_MERGE:
25179 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25180 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25181 case V16QI_FTYPE_V16QI_V16QI:
25182 case V16QI_FTYPE_V8HI_V8HI:
25183 case V8QI_FTYPE_V8QI_V8QI:
25184 case V8QI_FTYPE_V4HI_V4HI:
25185 case V8HI_FTYPE_V8HI_V8HI:
25186 case V8HI_FTYPE_V16QI_V16QI:
25187 case V8HI_FTYPE_V4SI_V4SI:
25188 case V8SF_FTYPE_V8SF_V8SF:
25189 case V8SF_FTYPE_V8SF_V8SI:
25190 case V4SI_FTYPE_V4SI_V4SI:
25191 case V4SI_FTYPE_V8HI_V8HI:
25192 case V4SI_FTYPE_V4SF_V4SF:
25193 case V4SI_FTYPE_V2DF_V2DF:
25194 case V4HI_FTYPE_V4HI_V4HI:
25195 case V4HI_FTYPE_V8QI_V8QI:
25196 case V4HI_FTYPE_V2SI_V2SI:
25197 case V4DF_FTYPE_V4DF_V4DF:
25198 case V4DF_FTYPE_V4DF_V4DI:
25199 case V4SF_FTYPE_V4SF_V4SF:
25200 case V4SF_FTYPE_V4SF_V4SI:
25201 case V4SF_FTYPE_V4SF_V2SI:
25202 case V4SF_FTYPE_V4SF_V2DF:
25203 case V4SF_FTYPE_V4SF_DI:
25204 case V4SF_FTYPE_V4SF_SI:
25205 case V2DI_FTYPE_V2DI_V2DI:
25206 case V2DI_FTYPE_V16QI_V16QI:
25207 case V2DI_FTYPE_V4SI_V4SI:
25208 case V2DI_FTYPE_V2DI_V16QI:
25209 case V2DI_FTYPE_V2DF_V2DF:
25210 case V2SI_FTYPE_V2SI_V2SI:
25211 case V2SI_FTYPE_V4HI_V4HI:
25212 case V2SI_FTYPE_V2SF_V2SF:
25213 case V2DF_FTYPE_V2DF_V2DF:
25214 case V2DF_FTYPE_V2DF_V4SF:
25215 case V2DF_FTYPE_V2DF_V2DI:
25216 case V2DF_FTYPE_V2DF_DI:
25217 case V2DF_FTYPE_V2DF_SI:
25218 case V2SF_FTYPE_V2SF_V2SF:
25219 case V1DI_FTYPE_V1DI_V1DI:
25220 case V1DI_FTYPE_V8QI_V8QI:
25221 case V1DI_FTYPE_V2SI_V2SI:
25222 if (comparison == UNKNOWN)
25223 return ix86_expand_binop_builtin (icode, exp, target);
25226 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25227 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25228 gcc_assert (comparison != UNKNOWN);
25232 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25233 case V8HI_FTYPE_V8HI_SI_COUNT:
25234 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25235 case V4SI_FTYPE_V4SI_SI_COUNT:
25236 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25237 case V4HI_FTYPE_V4HI_SI_COUNT:
25238 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25239 case V2DI_FTYPE_V2DI_SI_COUNT:
25240 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25241 case V2SI_FTYPE_V2SI_SI_COUNT:
25242 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25243 case V1DI_FTYPE_V1DI_SI_COUNT:
25245 last_arg_count = true;
25247 case UINT64_FTYPE_UINT64_UINT64:
25248 case UINT_FTYPE_UINT_UINT:
25249 case UINT_FTYPE_UINT_USHORT:
25250 case UINT_FTYPE_UINT_UCHAR:
25251 case UINT16_FTYPE_UINT16_INT:
25252 case UINT8_FTYPE_UINT8_INT:
25255 case V2DI_FTYPE_V2DI_INT_CONVERT:
25258 nargs_constant = 1;
25260 case V8HI_FTYPE_V8HI_INT:
25261 case V8HI_FTYPE_V8SF_INT:
25262 case V8HI_FTYPE_V4SF_INT:
25263 case V8SF_FTYPE_V8SF_INT:
25264 case V4SI_FTYPE_V4SI_INT:
25265 case V4SI_FTYPE_V8SI_INT:
25266 case V4HI_FTYPE_V4HI_INT:
25267 case V4DF_FTYPE_V4DF_INT:
25268 case V4SF_FTYPE_V4SF_INT:
25269 case V4SF_FTYPE_V8SF_INT:
25270 case V2DI_FTYPE_V2DI_INT:
25271 case V2DF_FTYPE_V2DF_INT:
25272 case V2DF_FTYPE_V4DF_INT:
25274 nargs_constant = 1;
25276 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25277 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25278 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25279 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25280 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25283 case V16QI_FTYPE_V16QI_V16QI_INT:
25284 case V8HI_FTYPE_V8HI_V8HI_INT:
25285 case V8SI_FTYPE_V8SI_V8SI_INT:
25286 case V8SI_FTYPE_V8SI_V4SI_INT:
25287 case V8SF_FTYPE_V8SF_V8SF_INT:
25288 case V8SF_FTYPE_V8SF_V4SF_INT:
25289 case V4SI_FTYPE_V4SI_V4SI_INT:
25290 case V4DF_FTYPE_V4DF_V4DF_INT:
25291 case V4DF_FTYPE_V4DF_V2DF_INT:
25292 case V4SF_FTYPE_V4SF_V4SF_INT:
25293 case V2DI_FTYPE_V2DI_V2DI_INT:
25294 case V2DF_FTYPE_V2DF_V2DF_INT:
25296 nargs_constant = 1;
25298 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25301 nargs_constant = 1;
25303 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25306 nargs_constant = 1;
25308 case V2DI_FTYPE_V2DI_UINT_UINT:
25310 nargs_constant = 2;
25312 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25313 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25314 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25315 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25317 nargs_constant = 1;
25319 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25321 nargs_constant = 2;
25324 gcc_unreachable ();
25327 gcc_assert (nargs <= ARRAY_SIZE (args));
25329 if (comparison != UNKNOWN)
25331 gcc_assert (nargs == 2);
25332 return ix86_expand_sse_compare (d, exp, target, swap);
25335 if (rmode == VOIDmode || rmode == tmode)
25339 || GET_MODE (target) != tmode
25340 || !insn_p->operand[0].predicate (target, tmode))
25341 target = gen_reg_rtx (tmode);
25342 real_target = target;
25346 target = gen_reg_rtx (rmode);
25347 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25350 for (i = 0; i < nargs; i++)
25352 tree arg = CALL_EXPR_ARG (exp, i);
25353 rtx op = expand_normal (arg);
25354 enum machine_mode mode = insn_p->operand[i + 1].mode;
25355 bool match = insn_p->operand[i + 1].predicate (op, mode);
25357 if (last_arg_count && (i + 1) == nargs)
25359 /* SIMD shift insns take either an 8-bit immediate or
25360 register as count. But builtin functions take int as
25361 count. If count doesn't match, we put it in register. */
25364 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25365 if (!insn_p->operand[i + 1].predicate (op, mode))
25366 op = copy_to_reg (op);
25369 else if ((nargs - i) <= nargs_constant)
25374 case CODE_FOR_sse4_1_roundpd:
25375 case CODE_FOR_sse4_1_roundps:
25376 case CODE_FOR_sse4_1_roundsd:
25377 case CODE_FOR_sse4_1_roundss:
25378 case CODE_FOR_sse4_1_blendps:
25379 case CODE_FOR_avx_blendpd256:
25380 case CODE_FOR_avx_vpermilv4df:
25381 case CODE_FOR_avx_roundpd256:
25382 case CODE_FOR_avx_roundps256:
25383 error ("the last argument must be a 4-bit immediate");
25386 case CODE_FOR_sse4_1_blendpd:
25387 case CODE_FOR_avx_vpermilv2df:
25388 case CODE_FOR_xop_vpermil2v2df3:
25389 case CODE_FOR_xop_vpermil2v4sf3:
25390 case CODE_FOR_xop_vpermil2v4df3:
25391 case CODE_FOR_xop_vpermil2v8sf3:
25392 error ("the last argument must be a 2-bit immediate");
25395 case CODE_FOR_avx_vextractf128v4df:
25396 case CODE_FOR_avx_vextractf128v8sf:
25397 case CODE_FOR_avx_vextractf128v8si:
25398 case CODE_FOR_avx_vinsertf128v4df:
25399 case CODE_FOR_avx_vinsertf128v8sf:
25400 case CODE_FOR_avx_vinsertf128v8si:
25401 error ("the last argument must be a 1-bit immediate");
25404 case CODE_FOR_avx_cmpsdv2df3:
25405 case CODE_FOR_avx_cmpssv4sf3:
25406 case CODE_FOR_avx_cmppdv2df3:
25407 case CODE_FOR_avx_cmppsv4sf3:
25408 case CODE_FOR_avx_cmppdv4df3:
25409 case CODE_FOR_avx_cmppsv8sf3:
25410 error ("the last argument must be a 5-bit immediate");
25414 switch (nargs_constant)
25417 if ((nargs - i) == nargs_constant)
25419 error ("the next to last argument must be an 8-bit immediate");
25423 error ("the last argument must be an 8-bit immediate");
25426 gcc_unreachable ();
25433 if (VECTOR_MODE_P (mode))
25434 op = safe_vector_operand (op, mode);
25436 /* If we aren't optimizing, only allow one memory operand to
25438 if (memory_operand (op, mode))
25441 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25443 if (optimize || !match || num_memory > 1)
25444 op = copy_to_mode_reg (mode, op);
25448 op = copy_to_reg (op);
25449 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25454 args[i].mode = mode;
25460 pat = GEN_FCN (icode) (real_target, args[0].op);
25463 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25466 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25470 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25471 args[2].op, args[3].op);
25474 gcc_unreachable ();
25484 /* Subroutine of ix86_expand_builtin to take care of special insns
25485 with variable number of operands. */
25488 ix86_expand_special_args_builtin (const struct builtin_description *d,
25489 tree exp, rtx target)
25493 unsigned int i, nargs, arg_adjust, memory;
25497 enum machine_mode mode;
25499 enum insn_code icode = d->icode;
25500 bool last_arg_constant = false;
25501 const struct insn_data_d *insn_p = &insn_data[icode];
25502 enum machine_mode tmode = insn_p->operand[0].mode;
25503 enum { load, store } klass;
25505 switch ((enum ix86_builtin_func_type) d->flag)
25507 case VOID_FTYPE_VOID:
25508 emit_insn (GEN_FCN (icode) (target));
25510 case VOID_FTYPE_UINT64:
25511 case VOID_FTYPE_UNSIGNED:
25517 case UINT64_FTYPE_VOID:
25518 case UNSIGNED_FTYPE_VOID:
25519 case UINT16_FTYPE_VOID:
25524 case UINT64_FTYPE_PUNSIGNED:
25525 case V2DI_FTYPE_PV2DI:
25526 case V32QI_FTYPE_PCCHAR:
25527 case V16QI_FTYPE_PCCHAR:
25528 case V8SF_FTYPE_PCV4SF:
25529 case V8SF_FTYPE_PCFLOAT:
25530 case V4SF_FTYPE_PCFLOAT:
25531 case V4DF_FTYPE_PCV2DF:
25532 case V4DF_FTYPE_PCDOUBLE:
25533 case V2DF_FTYPE_PCDOUBLE:
25534 case VOID_FTYPE_PVOID:
25539 case VOID_FTYPE_PV2SF_V4SF:
25540 case VOID_FTYPE_PV4DI_V4DI:
25541 case VOID_FTYPE_PV2DI_V2DI:
25542 case VOID_FTYPE_PCHAR_V32QI:
25543 case VOID_FTYPE_PCHAR_V16QI:
25544 case VOID_FTYPE_PFLOAT_V8SF:
25545 case VOID_FTYPE_PFLOAT_V4SF:
25546 case VOID_FTYPE_PDOUBLE_V4DF:
25547 case VOID_FTYPE_PDOUBLE_V2DF:
25548 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25549 case VOID_FTYPE_PINT_INT:
25552 /* Reserve memory operand for target. */
25553 memory = ARRAY_SIZE (args);
25555 case V4SF_FTYPE_V4SF_PCV2SF:
25556 case V2DF_FTYPE_V2DF_PCDOUBLE:
25561 case V8SF_FTYPE_PCV8SF_V8SF:
25562 case V4DF_FTYPE_PCV4DF_V4DF:
25563 case V4SF_FTYPE_PCV4SF_V4SF:
25564 case V2DF_FTYPE_PCV2DF_V2DF:
25569 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25570 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25571 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25572 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25575 /* Reserve memory operand for target. */
25576 memory = ARRAY_SIZE (args);
25578 case VOID_FTYPE_UINT_UINT_UINT:
25579 case VOID_FTYPE_UINT64_UINT_UINT:
25580 case UCHAR_FTYPE_UINT_UINT_UINT:
25581 case UCHAR_FTYPE_UINT64_UINT_UINT:
25584 memory = ARRAY_SIZE (args);
25585 last_arg_constant = true;
25588 gcc_unreachable ();
25591 gcc_assert (nargs <= ARRAY_SIZE (args));
25593 if (klass == store)
25595 arg = CALL_EXPR_ARG (exp, 0);
25596 op = expand_normal (arg);
25597 gcc_assert (target == 0);
25599 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25601 target = force_reg (tmode, op);
25609 || GET_MODE (target) != tmode
25610 || !insn_p->operand[0].predicate (target, tmode))
25611 target = gen_reg_rtx (tmode);
25614 for (i = 0; i < nargs; i++)
25616 enum machine_mode mode = insn_p->operand[i + 1].mode;
25619 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25620 op = expand_normal (arg);
25621 match = insn_p->operand[i + 1].predicate (op, mode);
25623 if (last_arg_constant && (i + 1) == nargs)
25627 if (icode == CODE_FOR_lwp_lwpvalsi3
25628 || icode == CODE_FOR_lwp_lwpinssi3
25629 || icode == CODE_FOR_lwp_lwpvaldi3
25630 || icode == CODE_FOR_lwp_lwpinsdi3)
25631 error ("the last argument must be a 32-bit immediate");
25633 error ("the last argument must be an 8-bit immediate");
25641 /* This must be the memory operand. */
25642 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25643 gcc_assert (GET_MODE (op) == mode
25644 || GET_MODE (op) == VOIDmode);
25648 /* This must be register. */
25649 if (VECTOR_MODE_P (mode))
25650 op = safe_vector_operand (op, mode);
25652 gcc_assert (GET_MODE (op) == mode
25653 || GET_MODE (op) == VOIDmode);
25654 op = copy_to_mode_reg (mode, op);
25659 args[i].mode = mode;
25665 pat = GEN_FCN (icode) (target);
25668 pat = GEN_FCN (icode) (target, args[0].op);
25671 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25674 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25677 gcc_unreachable ();
25683 return klass == store ? 0 : target;
25686 /* Return the integer constant in ARG. Constrain it to be in the range
25687 of the subparts of VEC_TYPE; issue an error if not. */
25690 get_element_number (tree vec_type, tree arg)
25692 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25694 if (!host_integerp (arg, 1)
25695 || (elt = tree_low_cst (arg, 1), elt > max))
25697 error ("selector must be an integer constant in the range 0..%wi", max);
25704 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25705 ix86_expand_vector_init. We DO have language-level syntax for this, in
25706 the form of (type){ init-list }. Except that since we can't place emms
25707 instructions from inside the compiler, we can't allow the use of MMX
25708 registers unless the user explicitly asks for it. So we do *not* define
25709 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25710 we have builtins invoked by mmintrin.h that gives us license to emit
25711 these sorts of instructions. */
25714 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25716 enum machine_mode tmode = TYPE_MODE (type);
25717 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25718 int i, n_elt = GET_MODE_NUNITS (tmode);
25719 rtvec v = rtvec_alloc (n_elt);
25721 gcc_assert (VECTOR_MODE_P (tmode));
25722 gcc_assert (call_expr_nargs (exp) == n_elt);
25724 for (i = 0; i < n_elt; ++i)
25726 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25727 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25730 if (!target || !register_operand (target, tmode))
25731 target = gen_reg_rtx (tmode);
25733 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25737 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25738 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25739 had a language-level syntax for referencing vector elements. */
25742 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25744 enum machine_mode tmode, mode0;
25749 arg0 = CALL_EXPR_ARG (exp, 0);
25750 arg1 = CALL_EXPR_ARG (exp, 1);
25752 op0 = expand_normal (arg0);
25753 elt = get_element_number (TREE_TYPE (arg0), arg1);
25755 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25756 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25757 gcc_assert (VECTOR_MODE_P (mode0));
25759 op0 = force_reg (mode0, op0);
25761 if (optimize || !target || !register_operand (target, tmode))
25762 target = gen_reg_rtx (tmode);
25764 ix86_expand_vector_extract (true, target, op0, elt);
25769 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25770 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25771 a language-level syntax for referencing vector elements. */
25774 ix86_expand_vec_set_builtin (tree exp)
25776 enum machine_mode tmode, mode1;
25777 tree arg0, arg1, arg2;
25779 rtx op0, op1, target;
25781 arg0 = CALL_EXPR_ARG (exp, 0);
25782 arg1 = CALL_EXPR_ARG (exp, 1);
25783 arg2 = CALL_EXPR_ARG (exp, 2);
25785 tmode = TYPE_MODE (TREE_TYPE (arg0));
25786 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25787 gcc_assert (VECTOR_MODE_P (tmode));
25789 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25790 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25791 elt = get_element_number (TREE_TYPE (arg0), arg2);
25793 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25794 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25796 op0 = force_reg (tmode, op0);
25797 op1 = force_reg (mode1, op1);
25799 /* OP0 is the source of these builtin functions and shouldn't be
25800 modified. Create a copy, use it and return it as target. */
25801 target = gen_reg_rtx (tmode);
25802 emit_move_insn (target, op0);
25803 ix86_expand_vector_set (true, target, op1, elt);
25808 /* Expand an expression EXP that calls a built-in function,
25809 with result going to TARGET if that's convenient
25810 (and in mode MODE if that's convenient).
25811 SUBTARGET may be used as the target for computing one of EXP's operands.
25812 IGNORE is nonzero if the value is to be ignored. */
25815 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25816 enum machine_mode mode ATTRIBUTE_UNUSED,
25817 int ignore ATTRIBUTE_UNUSED)
25819 const struct builtin_description *d;
25821 enum insn_code icode;
25822 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25823 tree arg0, arg1, arg2;
25824 rtx op0, op1, op2, pat;
25825 enum machine_mode mode0, mode1, mode2;
25826 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25828 /* Determine whether the builtin function is available under the current ISA.
25829 Originally the builtin was not created if it wasn't applicable to the
25830 current ISA based on the command line switches. With function specific
25831 options, we need to check in the context of the function making the call
25832 whether it is supported. */
25833 if (ix86_builtins_isa[fcode].isa
25834 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25836 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25837 NULL, NULL, false);
25840 error ("%qE needs unknown isa option", fndecl);
25843 gcc_assert (opts != NULL);
25844 error ("%qE needs isa option %s", fndecl, opts);
25852 case IX86_BUILTIN_MASKMOVQ:
25853 case IX86_BUILTIN_MASKMOVDQU:
25854 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25855 ? CODE_FOR_mmx_maskmovq
25856 : CODE_FOR_sse2_maskmovdqu);
25857 /* Note the arg order is different from the operand order. */
25858 arg1 = CALL_EXPR_ARG (exp, 0);
25859 arg2 = CALL_EXPR_ARG (exp, 1);
25860 arg0 = CALL_EXPR_ARG (exp, 2);
25861 op0 = expand_normal (arg0);
25862 op1 = expand_normal (arg1);
25863 op2 = expand_normal (arg2);
25864 mode0 = insn_data[icode].operand[0].mode;
25865 mode1 = insn_data[icode].operand[1].mode;
25866 mode2 = insn_data[icode].operand[2].mode;
25868 op0 = force_reg (Pmode, op0);
25869 op0 = gen_rtx_MEM (mode1, op0);
25871 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25872 op0 = copy_to_mode_reg (mode0, op0);
25873 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25874 op1 = copy_to_mode_reg (mode1, op1);
25875 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25876 op2 = copy_to_mode_reg (mode2, op2);
25877 pat = GEN_FCN (icode) (op0, op1, op2);
25883 case IX86_BUILTIN_LDMXCSR:
25884 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25885 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25886 emit_move_insn (target, op0);
25887 emit_insn (gen_sse_ldmxcsr (target));
25890 case IX86_BUILTIN_STMXCSR:
25891 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25892 emit_insn (gen_sse_stmxcsr (target));
25893 return copy_to_mode_reg (SImode, target);
25895 case IX86_BUILTIN_CLFLUSH:
25896 arg0 = CALL_EXPR_ARG (exp, 0);
25897 op0 = expand_normal (arg0);
25898 icode = CODE_FOR_sse2_clflush;
25899 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25900 op0 = copy_to_mode_reg (Pmode, op0);
25902 emit_insn (gen_sse2_clflush (op0));
25905 case IX86_BUILTIN_MONITOR:
25906 arg0 = CALL_EXPR_ARG (exp, 0);
25907 arg1 = CALL_EXPR_ARG (exp, 1);
25908 arg2 = CALL_EXPR_ARG (exp, 2);
25909 op0 = expand_normal (arg0);
25910 op1 = expand_normal (arg1);
25911 op2 = expand_normal (arg2);
25913 op0 = copy_to_mode_reg (Pmode, op0);
25915 op1 = copy_to_mode_reg (SImode, op1);
25917 op2 = copy_to_mode_reg (SImode, op2);
25918 emit_insn (ix86_gen_monitor (op0, op1, op2));
25921 case IX86_BUILTIN_MWAIT:
25922 arg0 = CALL_EXPR_ARG (exp, 0);
25923 arg1 = CALL_EXPR_ARG (exp, 1);
25924 op0 = expand_normal (arg0);
25925 op1 = expand_normal (arg1);
25927 op0 = copy_to_mode_reg (SImode, op0);
25929 op1 = copy_to_mode_reg (SImode, op1);
25930 emit_insn (gen_sse3_mwait (op0, op1));
25933 case IX86_BUILTIN_VEC_INIT_V2SI:
25934 case IX86_BUILTIN_VEC_INIT_V4HI:
25935 case IX86_BUILTIN_VEC_INIT_V8QI:
25936 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25938 case IX86_BUILTIN_VEC_EXT_V2DF:
25939 case IX86_BUILTIN_VEC_EXT_V2DI:
25940 case IX86_BUILTIN_VEC_EXT_V4SF:
25941 case IX86_BUILTIN_VEC_EXT_V4SI:
25942 case IX86_BUILTIN_VEC_EXT_V8HI:
25943 case IX86_BUILTIN_VEC_EXT_V2SI:
25944 case IX86_BUILTIN_VEC_EXT_V4HI:
25945 case IX86_BUILTIN_VEC_EXT_V16QI:
25946 return ix86_expand_vec_ext_builtin (exp, target);
25948 case IX86_BUILTIN_VEC_SET_V2DI:
25949 case IX86_BUILTIN_VEC_SET_V4SF:
25950 case IX86_BUILTIN_VEC_SET_V4SI:
25951 case IX86_BUILTIN_VEC_SET_V8HI:
25952 case IX86_BUILTIN_VEC_SET_V4HI:
25953 case IX86_BUILTIN_VEC_SET_V16QI:
25954 return ix86_expand_vec_set_builtin (exp);
25956 case IX86_BUILTIN_VEC_PERM_V2DF:
25957 case IX86_BUILTIN_VEC_PERM_V4SF:
25958 case IX86_BUILTIN_VEC_PERM_V2DI:
25959 case IX86_BUILTIN_VEC_PERM_V4SI:
25960 case IX86_BUILTIN_VEC_PERM_V8HI:
25961 case IX86_BUILTIN_VEC_PERM_V16QI:
25962 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25963 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25964 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25965 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25966 case IX86_BUILTIN_VEC_PERM_V4DF:
25967 case IX86_BUILTIN_VEC_PERM_V8SF:
25968 return ix86_expand_vec_perm_builtin (exp);
25970 case IX86_BUILTIN_INFQ:
25971 case IX86_BUILTIN_HUGE_VALQ:
25973 REAL_VALUE_TYPE inf;
25977 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25979 tmp = validize_mem (force_const_mem (mode, tmp));
25982 target = gen_reg_rtx (mode);
25984 emit_move_insn (target, tmp);
25988 case IX86_BUILTIN_LLWPCB:
25989 arg0 = CALL_EXPR_ARG (exp, 0);
25990 op0 = expand_normal (arg0);
25991 icode = CODE_FOR_lwp_llwpcb;
25992 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25993 op0 = copy_to_mode_reg (Pmode, op0);
25994 emit_insn (gen_lwp_llwpcb (op0));
25997 case IX86_BUILTIN_SLWPCB:
25998 icode = CODE_FOR_lwp_slwpcb;
26000 || !insn_data[icode].operand[0].predicate (target, Pmode))
26001 target = gen_reg_rtx (Pmode);
26002 emit_insn (gen_lwp_slwpcb (target));
26009 for (i = 0, d = bdesc_special_args;
26010 i < ARRAY_SIZE (bdesc_special_args);
26012 if (d->code == fcode)
26013 return ix86_expand_special_args_builtin (d, exp, target);
26015 for (i = 0, d = bdesc_args;
26016 i < ARRAY_SIZE (bdesc_args);
26018 if (d->code == fcode)
26021 case IX86_BUILTIN_FABSQ:
26022 case IX86_BUILTIN_COPYSIGNQ:
26024 /* Emit a normal call if SSE2 isn't available. */
26025 return expand_call (exp, target, ignore);
26027 return ix86_expand_args_builtin (d, exp, target);
26030 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26031 if (d->code == fcode)
26032 return ix86_expand_sse_comi (d, exp, target);
26034 for (i = 0, d = bdesc_pcmpestr;
26035 i < ARRAY_SIZE (bdesc_pcmpestr);
26037 if (d->code == fcode)
26038 return ix86_expand_sse_pcmpestr (d, exp, target);
26040 for (i = 0, d = bdesc_pcmpistr;
26041 i < ARRAY_SIZE (bdesc_pcmpistr);
26043 if (d->code == fcode)
26044 return ix86_expand_sse_pcmpistr (d, exp, target);
26046 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26047 if (d->code == fcode)
26048 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
26049 (enum ix86_builtin_func_type)
26050 d->flag, d->comparison);
26052 gcc_unreachable ();
26055 /* Returns a function decl for a vectorized version of the builtin function
26056 with builtin function code FN and the result vector type TYPE, or NULL_TREE
26057 if it is not available. */
26060 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
26063 enum machine_mode in_mode, out_mode;
26065 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
26067 if (TREE_CODE (type_out) != VECTOR_TYPE
26068 || TREE_CODE (type_in) != VECTOR_TYPE
26069 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
26072 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26073 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26074 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26075 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26079 case BUILT_IN_SQRT:
26080 if (out_mode == DFmode && in_mode == DFmode)
26082 if (out_n == 2 && in_n == 2)
26083 return ix86_builtins[IX86_BUILTIN_SQRTPD];
26084 else if (out_n == 4 && in_n == 4)
26085 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
26089 case BUILT_IN_SQRTF:
26090 if (out_mode == SFmode && in_mode == SFmode)
26092 if (out_n == 4 && in_n == 4)
26093 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
26094 else if (out_n == 8 && in_n == 8)
26095 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
26099 case BUILT_IN_LRINT:
26100 if (out_mode == SImode && out_n == 4
26101 && in_mode == DFmode && in_n == 2)
26102 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
26105 case BUILT_IN_LRINTF:
26106 if (out_mode == SImode && in_mode == SFmode)
26108 if (out_n == 4 && in_n == 4)
26109 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26110 else if (out_n == 8 && in_n == 8)
26111 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
26115 case BUILT_IN_COPYSIGN:
26116 if (out_mode == DFmode && in_mode == DFmode)
26118 if (out_n == 2 && in_n == 2)
26119 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26120 else if (out_n == 4 && in_n == 4)
26121 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
26125 case BUILT_IN_COPYSIGNF:
26126 if (out_mode == SFmode && in_mode == SFmode)
26128 if (out_n == 4 && in_n == 4)
26129 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26130 else if (out_n == 8 && in_n == 8)
26131 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
26139 /* Dispatch to a handler for a vectorization library. */
26140 if (ix86_veclib_handler)
26141 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26147 /* Handler for an SVML-style interface to
26148 a library with vectorized intrinsics. */
26151 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26154 tree fntype, new_fndecl, args;
26157 enum machine_mode el_mode, in_mode;
26160 /* The SVML is suitable for unsafe math only. */
26161 if (!flag_unsafe_math_optimizations)
26164 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26165 n = TYPE_VECTOR_SUBPARTS (type_out);
26166 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26167 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26168 if (el_mode != in_mode
26176 case BUILT_IN_LOG10:
26178 case BUILT_IN_TANH:
26180 case BUILT_IN_ATAN:
26181 case BUILT_IN_ATAN2:
26182 case BUILT_IN_ATANH:
26183 case BUILT_IN_CBRT:
26184 case BUILT_IN_SINH:
26186 case BUILT_IN_ASINH:
26187 case BUILT_IN_ASIN:
26188 case BUILT_IN_COSH:
26190 case BUILT_IN_ACOSH:
26191 case BUILT_IN_ACOS:
26192 if (el_mode != DFmode || n != 2)
26196 case BUILT_IN_EXPF:
26197 case BUILT_IN_LOGF:
26198 case BUILT_IN_LOG10F:
26199 case BUILT_IN_POWF:
26200 case BUILT_IN_TANHF:
26201 case BUILT_IN_TANF:
26202 case BUILT_IN_ATANF:
26203 case BUILT_IN_ATAN2F:
26204 case BUILT_IN_ATANHF:
26205 case BUILT_IN_CBRTF:
26206 case BUILT_IN_SINHF:
26207 case BUILT_IN_SINF:
26208 case BUILT_IN_ASINHF:
26209 case BUILT_IN_ASINF:
26210 case BUILT_IN_COSHF:
26211 case BUILT_IN_COSF:
26212 case BUILT_IN_ACOSHF:
26213 case BUILT_IN_ACOSF:
26214 if (el_mode != SFmode || n != 4)
26222 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26224 if (fn == BUILT_IN_LOGF)
26225 strcpy (name, "vmlsLn4");
26226 else if (fn == BUILT_IN_LOG)
26227 strcpy (name, "vmldLn2");
26230 sprintf (name, "vmls%s", bname+10);
26231 name[strlen (name)-1] = '4';
26234 sprintf (name, "vmld%s2", bname+10);
26236 /* Convert to uppercase. */
26240 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26241 args = TREE_CHAIN (args))
26245 fntype = build_function_type_list (type_out, type_in, NULL);
26247 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26249 /* Build a function declaration for the vectorized function. */
26250 new_fndecl = build_decl (BUILTINS_LOCATION,
26251 FUNCTION_DECL, get_identifier (name), fntype);
26252 TREE_PUBLIC (new_fndecl) = 1;
26253 DECL_EXTERNAL (new_fndecl) = 1;
26254 DECL_IS_NOVOPS (new_fndecl) = 1;
26255 TREE_READONLY (new_fndecl) = 1;
26260 /* Handler for an ACML-style interface to
26261 a library with vectorized intrinsics. */
26264 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26266 char name[20] = "__vr.._";
26267 tree fntype, new_fndecl, args;
26270 enum machine_mode el_mode, in_mode;
26273 /* The ACML is 64bits only and suitable for unsafe math only as
26274 it does not correctly support parts of IEEE with the required
26275 precision such as denormals. */
26277 || !flag_unsafe_math_optimizations)
26280 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26281 n = TYPE_VECTOR_SUBPARTS (type_out);
26282 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26283 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26284 if (el_mode != in_mode
26294 case BUILT_IN_LOG2:
26295 case BUILT_IN_LOG10:
26298 if (el_mode != DFmode
26303 case BUILT_IN_SINF:
26304 case BUILT_IN_COSF:
26305 case BUILT_IN_EXPF:
26306 case BUILT_IN_POWF:
26307 case BUILT_IN_LOGF:
26308 case BUILT_IN_LOG2F:
26309 case BUILT_IN_LOG10F:
26312 if (el_mode != SFmode
26321 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26322 sprintf (name + 7, "%s", bname+10);
26325 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26326 args = TREE_CHAIN (args))
26330 fntype = build_function_type_list (type_out, type_in, NULL);
26332 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26334 /* Build a function declaration for the vectorized function. */
26335 new_fndecl = build_decl (BUILTINS_LOCATION,
26336 FUNCTION_DECL, get_identifier (name), fntype);
26337 TREE_PUBLIC (new_fndecl) = 1;
26338 DECL_EXTERNAL (new_fndecl) = 1;
26339 DECL_IS_NOVOPS (new_fndecl) = 1;
26340 TREE_READONLY (new_fndecl) = 1;
26346 /* Returns a decl of a function that implements conversion of an integer vector
26347 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26348 are the types involved when converting according to CODE.
26349 Return NULL_TREE if it is not available. */
26352 ix86_vectorize_builtin_conversion (unsigned int code,
26353 tree dest_type, tree src_type)
26361 switch (TYPE_MODE (src_type))
26364 switch (TYPE_MODE (dest_type))
26367 return (TYPE_UNSIGNED (src_type)
26368 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26369 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26371 return (TYPE_UNSIGNED (src_type)
26373 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26379 switch (TYPE_MODE (dest_type))
26382 return (TYPE_UNSIGNED (src_type)
26384 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
26393 case FIX_TRUNC_EXPR:
26394 switch (TYPE_MODE (dest_type))
26397 switch (TYPE_MODE (src_type))
26400 return (TYPE_UNSIGNED (dest_type)
26402 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26404 return (TYPE_UNSIGNED (dest_type)
26406 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26413 switch (TYPE_MODE (src_type))
26416 return (TYPE_UNSIGNED (dest_type)
26418 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26435 /* Returns a code for a target-specific builtin that implements
26436 reciprocal of the function, or NULL_TREE if not available. */
26439 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26440 bool sqrt ATTRIBUTE_UNUSED)
26442 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26443 && flag_finite_math_only && !flag_trapping_math
26444 && flag_unsafe_math_optimizations))
26448 /* Machine dependent builtins. */
26451 /* Vectorized version of sqrt to rsqrt conversion. */
26452 case IX86_BUILTIN_SQRTPS_NR:
26453 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26455 case IX86_BUILTIN_SQRTPS_NR256:
26456 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
26462 /* Normal builtins. */
26465 /* Sqrt to rsqrt conversion. */
26466 case BUILT_IN_SQRTF:
26467 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26474 /* Helper for avx_vpermilps256_operand et al. This is also used by
26475 the expansion functions to turn the parallel back into a mask.
26476 The return value is 0 for no match and the imm8+1 for a match. */
26479 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
26481 unsigned i, nelt = GET_MODE_NUNITS (mode);
26483 unsigned char ipar[8];
26485 if (XVECLEN (par, 0) != (int) nelt)
26488 /* Validate that all of the elements are constants, and not totally
26489 out of range. Copy the data into an integral array to make the
26490 subsequent checks easier. */
26491 for (i = 0; i < nelt; ++i)
26493 rtx er = XVECEXP (par, 0, i);
26494 unsigned HOST_WIDE_INT ei;
26496 if (!CONST_INT_P (er))
26507 /* In the 256-bit DFmode case, we can only move elements within
26509 for (i = 0; i < 2; ++i)
26513 mask |= ipar[i] << i;
26515 for (i = 2; i < 4; ++i)
26519 mask |= (ipar[i] - 2) << i;
26524 /* In the 256-bit SFmode case, we have full freedom of movement
26525 within the low 128-bit lane, but the high 128-bit lane must
26526 mirror the exact same pattern. */
26527 for (i = 0; i < 4; ++i)
26528 if (ipar[i] + 4 != ipar[i + 4])
26535 /* In the 128-bit case, we've full freedom in the placement of
26536 the elements from the source operand. */
26537 for (i = 0; i < nelt; ++i)
26538 mask |= ipar[i] << (i * (nelt / 2));
26542 gcc_unreachable ();
26545 /* Make sure success has a non-zero value by adding one. */
26549 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26550 the expansion functions to turn the parallel back into a mask.
26551 The return value is 0 for no match and the imm8+1 for a match. */
26554 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26556 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26558 unsigned char ipar[8];
26560 if (XVECLEN (par, 0) != (int) nelt)
26563 /* Validate that all of the elements are constants, and not totally
26564 out of range. Copy the data into an integral array to make the
26565 subsequent checks easier. */
26566 for (i = 0; i < nelt; ++i)
26568 rtx er = XVECEXP (par, 0, i);
26569 unsigned HOST_WIDE_INT ei;
26571 if (!CONST_INT_P (er))
26574 if (ei >= 2 * nelt)
26579 /* Validate that the halves of the permute are halves. */
26580 for (i = 0; i < nelt2 - 1; ++i)
26581 if (ipar[i] + 1 != ipar[i + 1])
26583 for (i = nelt2; i < nelt - 1; ++i)
26584 if (ipar[i] + 1 != ipar[i + 1])
26587 /* Reconstruct the mask. */
26588 for (i = 0; i < 2; ++i)
26590 unsigned e = ipar[i * nelt2];
26594 mask |= e << (i * 4);
26597 /* Make sure success has a non-zero value by adding one. */
26602 /* Store OPERAND to the memory after reload is completed. This means
26603 that we can't easily use assign_stack_local. */
26605 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26609 gcc_assert (reload_completed);
26610 if (ix86_using_red_zone ())
26612 result = gen_rtx_MEM (mode,
26613 gen_rtx_PLUS (Pmode,
26615 GEN_INT (-RED_ZONE_SIZE)));
26616 emit_move_insn (result, operand);
26618 else if (TARGET_64BIT)
26624 operand = gen_lowpart (DImode, operand);
26628 gen_rtx_SET (VOIDmode,
26629 gen_rtx_MEM (DImode,
26630 gen_rtx_PRE_DEC (DImode,
26631 stack_pointer_rtx)),
26635 gcc_unreachable ();
26637 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26646 split_double_mode (mode, &operand, 1, operands, operands + 1);
26648 gen_rtx_SET (VOIDmode,
26649 gen_rtx_MEM (SImode,
26650 gen_rtx_PRE_DEC (Pmode,
26651 stack_pointer_rtx)),
26654 gen_rtx_SET (VOIDmode,
26655 gen_rtx_MEM (SImode,
26656 gen_rtx_PRE_DEC (Pmode,
26657 stack_pointer_rtx)),
26662 /* Store HImodes as SImodes. */
26663 operand = gen_lowpart (SImode, operand);
26667 gen_rtx_SET (VOIDmode,
26668 gen_rtx_MEM (GET_MODE (operand),
26669 gen_rtx_PRE_DEC (SImode,
26670 stack_pointer_rtx)),
26674 gcc_unreachable ();
26676 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26681 /* Free operand from the memory. */
26683 ix86_free_from_memory (enum machine_mode mode)
26685 if (!ix86_using_red_zone ())
26689 if (mode == DImode || TARGET_64BIT)
26693 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26694 to pop or add instruction if registers are available. */
26695 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26696 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26701 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26702 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26704 static const reg_class_t *
26705 i386_ira_cover_classes (void)
26707 static const reg_class_t sse_fpmath_classes[] = {
26708 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26710 static const reg_class_t no_sse_fpmath_classes[] = {
26711 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26714 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26717 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
26719 Put float CONST_DOUBLE in the constant pool instead of fp regs.
26720 QImode must go into class Q_REGS.
26721 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26722 movdf to do mem-to-mem moves through integer regs. */
26725 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
26727 enum machine_mode mode = GET_MODE (x);
26729 /* We're only allowed to return a subclass of CLASS. Many of the
26730 following checks fail for NO_REGS, so eliminate that early. */
26731 if (regclass == NO_REGS)
26734 /* All classes can load zeros. */
26735 if (x == CONST0_RTX (mode))
26738 /* Force constants into memory if we are loading a (nonzero) constant into
26739 an MMX or SSE register. This is because there are no MMX/SSE instructions
26740 to load from a constant. */
26742 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26745 /* Prefer SSE regs only, if we can use them for math. */
26746 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26747 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26749 /* Floating-point constants need more complex checks. */
26750 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26752 /* General regs can load everything. */
26753 if (reg_class_subset_p (regclass, GENERAL_REGS))
26756 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26757 zero above. We only want to wind up preferring 80387 registers if
26758 we plan on doing computation with them. */
26760 && standard_80387_constant_p (x))
26762 /* Limit class to non-sse. */
26763 if (regclass == FLOAT_SSE_REGS)
26765 if (regclass == FP_TOP_SSE_REGS)
26767 if (regclass == FP_SECOND_SSE_REGS)
26768 return FP_SECOND_REG;
26769 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26776 /* Generally when we see PLUS here, it's the function invariant
26777 (plus soft-fp const_int). Which can only be computed into general
26779 if (GET_CODE (x) == PLUS)
26780 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26782 /* QImode constants are easy to load, but non-constant QImode data
26783 must go into Q_REGS. */
26784 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26786 if (reg_class_subset_p (regclass, Q_REGS))
26788 if (reg_class_subset_p (Q_REGS, regclass))
26796 /* Discourage putting floating-point values in SSE registers unless
26797 SSE math is being used, and likewise for the 387 registers. */
26799 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26801 enum machine_mode mode = GET_MODE (x);
26803 /* Restrict the output reload class to the register bank that we are doing
26804 math on. If we would like not to return a subset of CLASS, reject this
26805 alternative: if reload cannot do this, it will still use its choice. */
26806 mode = GET_MODE (x);
26807 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26808 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26810 if (X87_FLOAT_MODE_P (mode))
26812 if (regclass == FP_TOP_SSE_REGS)
26814 else if (regclass == FP_SECOND_SSE_REGS)
26815 return FP_SECOND_REG;
26817 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26824 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26825 enum machine_mode mode,
26826 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26828 /* QImode spills from non-QI registers require
26829 intermediate register on 32bit targets. */
26830 if (!in_p && mode == QImode && !TARGET_64BIT
26831 && (rclass == GENERAL_REGS
26832 || rclass == LEGACY_REGS
26833 || rclass == INDEX_REGS))
26842 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26843 regno = true_regnum (x);
26845 /* Return Q_REGS if the operand is in memory. */
26853 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26856 ix86_class_likely_spilled_p (reg_class_t rclass)
26867 case SSE_FIRST_REG:
26869 case FP_SECOND_REG:
26879 /* If we are copying between general and FP registers, we need a memory
26880 location. The same is true for SSE and MMX registers.
26882 To optimize register_move_cost performance, allow inline variant.
26884 The macro can't work reliably when one of the CLASSES is class containing
26885 registers from multiple units (SSE, MMX, integer). We avoid this by never
26886 combining those units in single alternative in the machine description.
26887 Ensure that this constraint holds to avoid unexpected surprises.
26889 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26890 enforce these sanity checks. */
26893 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26894 enum machine_mode mode, int strict)
26896 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26897 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26898 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26899 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26900 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26901 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26903 gcc_assert (!strict);
26907 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26910 /* ??? This is a lie. We do have moves between mmx/general, and for
26911 mmx/sse2. But by saying we need secondary memory we discourage the
26912 register allocator from using the mmx registers unless needed. */
26913 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26916 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26918 /* SSE1 doesn't have any direct moves from other classes. */
26922 /* If the target says that inter-unit moves are more expensive
26923 than moving through memory, then don't generate them. */
26924 if (!TARGET_INTER_UNIT_MOVES)
26927 /* Between SSE and general, we have moves no larger than word size. */
26928 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26936 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26937 enum machine_mode mode, int strict)
26939 return inline_secondary_memory_needed (class1, class2, mode, strict);
26942 /* Return true if the registers in CLASS cannot represent the change from
26943 modes FROM to TO. */
26946 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26947 enum reg_class regclass)
26952 /* x87 registers can't do subreg at all, as all values are reformatted
26953 to extended precision. */
26954 if (MAYBE_FLOAT_CLASS_P (regclass))
26957 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26959 /* Vector registers do not support QI or HImode loads. If we don't
26960 disallow a change to these modes, reload will assume it's ok to
26961 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26962 the vec_dupv4hi pattern. */
26963 if (GET_MODE_SIZE (from) < 4)
26966 /* Vector registers do not support subreg with nonzero offsets, which
26967 are otherwise valid for integer registers. Since we can't see
26968 whether we have a nonzero offset from here, prohibit all
26969 nonparadoxical subregs changing size. */
26970 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26977 /* Return the cost of moving data of mode M between a
26978 register and memory. A value of 2 is the default; this cost is
26979 relative to those in `REGISTER_MOVE_COST'.
26981 This function is used extensively by register_move_cost that is used to
26982 build tables at startup. Make it inline in this case.
26983 When IN is 2, return maximum of in and out move cost.
26985 If moving between registers and memory is more expensive than
26986 between two registers, you should define this macro to express the
26989 Model also increased moving costs of QImode registers in non
26993 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26997 if (FLOAT_CLASS_P (regclass))
27015 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
27016 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
27018 if (SSE_CLASS_P (regclass))
27021 switch (GET_MODE_SIZE (mode))
27036 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
27037 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
27039 if (MMX_CLASS_P (regclass))
27042 switch (GET_MODE_SIZE (mode))
27054 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
27055 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
27057 switch (GET_MODE_SIZE (mode))
27060 if (Q_CLASS_P (regclass) || TARGET_64BIT)
27063 return ix86_cost->int_store[0];
27064 if (TARGET_PARTIAL_REG_DEPENDENCY
27065 && optimize_function_for_speed_p (cfun))
27066 cost = ix86_cost->movzbl_load;
27068 cost = ix86_cost->int_load[0];
27070 return MAX (cost, ix86_cost->int_store[0]);
27076 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
27078 return ix86_cost->movzbl_load;
27080 return ix86_cost->int_store[0] + 4;
27085 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
27086 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
27088 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
27089 if (mode == TFmode)
27092 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
27094 cost = ix86_cost->int_load[2];
27096 cost = ix86_cost->int_store[2];
27097 return (cost * (((int) GET_MODE_SIZE (mode)
27098 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
27103 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
27106 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
27110 /* Return the cost of moving data from a register in class CLASS1 to
27111 one in class CLASS2.
27113 It is not required that the cost always equal 2 when FROM is the same as TO;
27114 on some machines it is expensive to move between registers if they are not
27115 general registers. */
27118 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
27119 reg_class_t class2_i)
27121 enum reg_class class1 = (enum reg_class) class1_i;
27122 enum reg_class class2 = (enum reg_class) class2_i;
27124 /* In case we require secondary memory, compute cost of the store followed
27125 by load. In order to avoid bad register allocation choices, we need
27126 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27128 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27132 cost += inline_memory_move_cost (mode, class1, 2);
27133 cost += inline_memory_move_cost (mode, class2, 2);
27135 /* In case of copying from general_purpose_register we may emit multiple
27136 stores followed by single load causing memory size mismatch stall.
27137 Count this as arbitrarily high cost of 20. */
27138 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27141 /* In the case of FP/MMX moves, the registers actually overlap, and we
27142 have to switch modes in order to treat them differently. */
27143 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27144 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27150 /* Moves between SSE/MMX and integer unit are expensive. */
27151 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27152 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27154 /* ??? By keeping returned value relatively high, we limit the number
27155 of moves between integer and MMX/SSE registers for all targets.
27156 Additionally, high value prevents problem with x86_modes_tieable_p(),
27157 where integer modes in MMX/SSE registers are not tieable
27158 because of missing QImode and HImode moves to, from or between
27159 MMX/SSE registers. */
27160 return MAX (8, ix86_cost->mmxsse_to_integer);
27162 if (MAYBE_FLOAT_CLASS_P (class1))
27163 return ix86_cost->fp_move;
27164 if (MAYBE_SSE_CLASS_P (class1))
27165 return ix86_cost->sse_move;
27166 if (MAYBE_MMX_CLASS_P (class1))
27167 return ix86_cost->mmx_move;
27171 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27174 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27176 /* Flags and only flags can only hold CCmode values. */
27177 if (CC_REGNO_P (regno))
27178 return GET_MODE_CLASS (mode) == MODE_CC;
27179 if (GET_MODE_CLASS (mode) == MODE_CC
27180 || GET_MODE_CLASS (mode) == MODE_RANDOM
27181 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27183 if (FP_REGNO_P (regno))
27184 return VALID_FP_MODE_P (mode);
27185 if (SSE_REGNO_P (regno))
27187 /* We implement the move patterns for all vector modes into and
27188 out of SSE registers, even when no operation instructions
27189 are available. OImode move is available only when AVX is
27191 return ((TARGET_AVX && mode == OImode)
27192 || VALID_AVX256_REG_MODE (mode)
27193 || VALID_SSE_REG_MODE (mode)
27194 || VALID_SSE2_REG_MODE (mode)
27195 || VALID_MMX_REG_MODE (mode)
27196 || VALID_MMX_REG_MODE_3DNOW (mode));
27198 if (MMX_REGNO_P (regno))
27200 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27201 so if the register is available at all, then we can move data of
27202 the given mode into or out of it. */
27203 return (VALID_MMX_REG_MODE (mode)
27204 || VALID_MMX_REG_MODE_3DNOW (mode));
27207 if (mode == QImode)
27209 /* Take care for QImode values - they can be in non-QI regs,
27210 but then they do cause partial register stalls. */
27211 if (regno <= BX_REG || TARGET_64BIT)
27213 if (!TARGET_PARTIAL_REG_STALL)
27215 return reload_in_progress || reload_completed;
27217 /* We handle both integer and floats in the general purpose registers. */
27218 else if (VALID_INT_MODE_P (mode))
27220 else if (VALID_FP_MODE_P (mode))
27222 else if (VALID_DFP_MODE_P (mode))
27224 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27225 on to use that value in smaller contexts, this can easily force a
27226 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27227 supporting DImode, allow it. */
27228 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27234 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27235 tieable integer mode. */
27238 ix86_tieable_integer_mode_p (enum machine_mode mode)
27247 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27250 return TARGET_64BIT;
27257 /* Return true if MODE1 is accessible in a register that can hold MODE2
27258 without copying. That is, all register classes that can hold MODE2
27259 can also hold MODE1. */
27262 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27264 if (mode1 == mode2)
27267 if (ix86_tieable_integer_mode_p (mode1)
27268 && ix86_tieable_integer_mode_p (mode2))
27271 /* MODE2 being XFmode implies fp stack or general regs, which means we
27272 can tie any smaller floating point modes to it. Note that we do not
27273 tie this with TFmode. */
27274 if (mode2 == XFmode)
27275 return mode1 == SFmode || mode1 == DFmode;
27277 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27278 that we can tie it with SFmode. */
27279 if (mode2 == DFmode)
27280 return mode1 == SFmode;
27282 /* If MODE2 is only appropriate for an SSE register, then tie with
27283 any other mode acceptable to SSE registers. */
27284 if (GET_MODE_SIZE (mode2) == 16
27285 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27286 return (GET_MODE_SIZE (mode1) == 16
27287 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27289 /* If MODE2 is appropriate for an MMX register, then tie
27290 with any other mode acceptable to MMX registers. */
27291 if (GET_MODE_SIZE (mode2) == 8
27292 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27293 return (GET_MODE_SIZE (mode1) == 8
27294 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27299 /* Compute a (partial) cost for rtx X. Return true if the complete
27300 cost has been computed, and false if subexpressions should be
27301 scanned. In either case, *TOTAL contains the cost result. */
27304 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27306 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27307 enum machine_mode mode = GET_MODE (x);
27308 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27316 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27318 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27320 else if (flag_pic && SYMBOLIC_CONST (x)
27322 || (!GET_CODE (x) != LABEL_REF
27323 && (GET_CODE (x) != SYMBOL_REF
27324 || !SYMBOL_REF_LOCAL_P (x)))))
27331 if (mode == VOIDmode)
27334 switch (standard_80387_constant_p (x))
27339 default: /* Other constants */
27344 /* Start with (MEM (SYMBOL_REF)), since that's where
27345 it'll probably end up. Add a penalty for size. */
27346 *total = (COSTS_N_INSNS (1)
27347 + (flag_pic != 0 && !TARGET_64BIT)
27348 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27354 /* The zero extensions is often completely free on x86_64, so make
27355 it as cheap as possible. */
27356 if (TARGET_64BIT && mode == DImode
27357 && GET_MODE (XEXP (x, 0)) == SImode)
27359 else if (TARGET_ZERO_EXTEND_WITH_AND)
27360 *total = cost->add;
27362 *total = cost->movzx;
27366 *total = cost->movsx;
27370 if (CONST_INT_P (XEXP (x, 1))
27371 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27373 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27376 *total = cost->add;
27379 if ((value == 2 || value == 3)
27380 && cost->lea <= cost->shift_const)
27382 *total = cost->lea;
27392 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27394 if (CONST_INT_P (XEXP (x, 1)))
27396 if (INTVAL (XEXP (x, 1)) > 32)
27397 *total = cost->shift_const + COSTS_N_INSNS (2);
27399 *total = cost->shift_const * 2;
27403 if (GET_CODE (XEXP (x, 1)) == AND)
27404 *total = cost->shift_var * 2;
27406 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27411 if (CONST_INT_P (XEXP (x, 1)))
27412 *total = cost->shift_const;
27414 *total = cost->shift_var;
27419 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27421 /* ??? SSE scalar cost should be used here. */
27422 *total = cost->fmul;
27425 else if (X87_FLOAT_MODE_P (mode))
27427 *total = cost->fmul;
27430 else if (FLOAT_MODE_P (mode))
27432 /* ??? SSE vector cost should be used here. */
27433 *total = cost->fmul;
27438 rtx op0 = XEXP (x, 0);
27439 rtx op1 = XEXP (x, 1);
27441 if (CONST_INT_P (XEXP (x, 1)))
27443 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27444 for (nbits = 0; value != 0; value &= value - 1)
27448 /* This is arbitrary. */
27451 /* Compute costs correctly for widening multiplication. */
27452 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27453 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27454 == GET_MODE_SIZE (mode))
27456 int is_mulwiden = 0;
27457 enum machine_mode inner_mode = GET_MODE (op0);
27459 if (GET_CODE (op0) == GET_CODE (op1))
27460 is_mulwiden = 1, op1 = XEXP (op1, 0);
27461 else if (CONST_INT_P (op1))
27463 if (GET_CODE (op0) == SIGN_EXTEND)
27464 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27467 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27471 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
27474 *total = (cost->mult_init[MODE_INDEX (mode)]
27475 + nbits * cost->mult_bit
27476 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27485 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27486 /* ??? SSE cost should be used here. */
27487 *total = cost->fdiv;
27488 else if (X87_FLOAT_MODE_P (mode))
27489 *total = cost->fdiv;
27490 else if (FLOAT_MODE_P (mode))
27491 /* ??? SSE vector cost should be used here. */
27492 *total = cost->fdiv;
27494 *total = cost->divide[MODE_INDEX (mode)];
27498 if (GET_MODE_CLASS (mode) == MODE_INT
27499 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27501 if (GET_CODE (XEXP (x, 0)) == PLUS
27502 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27503 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27504 && CONSTANT_P (XEXP (x, 1)))
27506 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27507 if (val == 2 || val == 4 || val == 8)
27509 *total = cost->lea;
27510 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27511 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27512 outer_code, speed);
27513 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27517 else if (GET_CODE (XEXP (x, 0)) == MULT
27518 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27520 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27521 if (val == 2 || val == 4 || val == 8)
27523 *total = cost->lea;
27524 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27525 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27529 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27531 *total = cost->lea;
27532 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27533 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27534 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27541 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27543 /* ??? SSE cost should be used here. */
27544 *total = cost->fadd;
27547 else if (X87_FLOAT_MODE_P (mode))
27549 *total = cost->fadd;
27552 else if (FLOAT_MODE_P (mode))
27554 /* ??? SSE vector cost should be used here. */
27555 *total = cost->fadd;
27563 if (!TARGET_64BIT && mode == DImode)
27565 *total = (cost->add * 2
27566 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27567 << (GET_MODE (XEXP (x, 0)) != DImode))
27568 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27569 << (GET_MODE (XEXP (x, 1)) != DImode)));
27575 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27577 /* ??? SSE cost should be used here. */
27578 *total = cost->fchs;
27581 else if (X87_FLOAT_MODE_P (mode))
27583 *total = cost->fchs;
27586 else if (FLOAT_MODE_P (mode))
27588 /* ??? SSE vector cost should be used here. */
27589 *total = cost->fchs;
27595 if (!TARGET_64BIT && mode == DImode)
27596 *total = cost->add * 2;
27598 *total = cost->add;
27602 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27603 && XEXP (XEXP (x, 0), 1) == const1_rtx
27604 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27605 && XEXP (x, 1) == const0_rtx)
27607 /* This kind of construct is implemented using test[bwl].
27608 Treat it as if we had an AND. */
27609 *total = (cost->add
27610 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27611 + rtx_cost (const1_rtx, outer_code, speed));
27617 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27622 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27623 /* ??? SSE cost should be used here. */
27624 *total = cost->fabs;
27625 else if (X87_FLOAT_MODE_P (mode))
27626 *total = cost->fabs;
27627 else if (FLOAT_MODE_P (mode))
27628 /* ??? SSE vector cost should be used here. */
27629 *total = cost->fabs;
27633 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27634 /* ??? SSE cost should be used here. */
27635 *total = cost->fsqrt;
27636 else if (X87_FLOAT_MODE_P (mode))
27637 *total = cost->fsqrt;
27638 else if (FLOAT_MODE_P (mode))
27639 /* ??? SSE vector cost should be used here. */
27640 *total = cost->fsqrt;
27644 if (XINT (x, 1) == UNSPEC_TP)
27651 case VEC_DUPLICATE:
27652 /* ??? Assume all of these vector manipulation patterns are
27653 recognizable. In which case they all pretty much have the
27655 *total = COSTS_N_INSNS (1);
27665 static int current_machopic_label_num;
27667 /* Given a symbol name and its associated stub, write out the
27668 definition of the stub. */
27671 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27673 unsigned int length;
27674 char *binder_name, *symbol_name, lazy_ptr_name[32];
27675 int label = ++current_machopic_label_num;
27677 /* For 64-bit we shouldn't get here. */
27678 gcc_assert (!TARGET_64BIT);
27680 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27681 symb = targetm.strip_name_encoding (symb);
27683 length = strlen (stub);
27684 binder_name = XALLOCAVEC (char, length + 32);
27685 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27687 length = strlen (symb);
27688 symbol_name = XALLOCAVEC (char, length + 32);
27689 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27691 sprintf (lazy_ptr_name, "L%d$lz", label);
27694 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27696 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27698 fprintf (file, "%s:\n", stub);
27699 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27703 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27704 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27705 fprintf (file, "\tjmp\t*%%edx\n");
27708 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27710 fprintf (file, "%s:\n", binder_name);
27714 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27715 fputs ("\tpushl\t%eax\n", file);
27718 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27720 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27722 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27723 fprintf (file, "%s:\n", lazy_ptr_name);
27724 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27725 fprintf (file, ASM_LONG "%s\n", binder_name);
27727 #endif /* TARGET_MACHO */
27729 /* Order the registers for register allocator. */
27732 x86_order_regs_for_local_alloc (void)
27737 /* First allocate the local general purpose registers. */
27738 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27739 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27740 reg_alloc_order [pos++] = i;
27742 /* Global general purpose registers. */
27743 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27744 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27745 reg_alloc_order [pos++] = i;
27747 /* x87 registers come first in case we are doing FP math
27749 if (!TARGET_SSE_MATH)
27750 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27751 reg_alloc_order [pos++] = i;
27753 /* SSE registers. */
27754 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27755 reg_alloc_order [pos++] = i;
27756 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27757 reg_alloc_order [pos++] = i;
27759 /* x87 registers. */
27760 if (TARGET_SSE_MATH)
27761 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27762 reg_alloc_order [pos++] = i;
27764 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27765 reg_alloc_order [pos++] = i;
27767 /* Initialize the rest of array as we do not allocate some registers
27769 while (pos < FIRST_PSEUDO_REGISTER)
27770 reg_alloc_order [pos++] = 0;
27773 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27774 struct attribute_spec.handler. */
27776 ix86_handle_abi_attribute (tree *node, tree name,
27777 tree args ATTRIBUTE_UNUSED,
27778 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27780 if (TREE_CODE (*node) != FUNCTION_TYPE
27781 && TREE_CODE (*node) != METHOD_TYPE
27782 && TREE_CODE (*node) != FIELD_DECL
27783 && TREE_CODE (*node) != TYPE_DECL)
27785 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27787 *no_add_attrs = true;
27792 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27794 *no_add_attrs = true;
27798 /* Can combine regparm with all attributes but fastcall. */
27799 if (is_attribute_p ("ms_abi", name))
27801 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27803 error ("ms_abi and sysv_abi attributes are not compatible");
27808 else if (is_attribute_p ("sysv_abi", name))
27810 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27812 error ("ms_abi and sysv_abi attributes are not compatible");
27821 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27822 struct attribute_spec.handler. */
27824 ix86_handle_struct_attribute (tree *node, tree name,
27825 tree args ATTRIBUTE_UNUSED,
27826 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27829 if (DECL_P (*node))
27831 if (TREE_CODE (*node) == TYPE_DECL)
27832 type = &TREE_TYPE (*node);
27837 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27838 || TREE_CODE (*type) == UNION_TYPE)))
27840 warning (OPT_Wattributes, "%qE attribute ignored",
27842 *no_add_attrs = true;
27845 else if ((is_attribute_p ("ms_struct", name)
27846 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27847 || ((is_attribute_p ("gcc_struct", name)
27848 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27850 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27852 *no_add_attrs = true;
27859 ix86_handle_fndecl_attribute (tree *node, tree name,
27860 tree args ATTRIBUTE_UNUSED,
27861 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27863 if (TREE_CODE (*node) != FUNCTION_DECL)
27865 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27867 *no_add_attrs = true;
27873 ix86_ms_bitfield_layout_p (const_tree record_type)
27875 return ((TARGET_MS_BITFIELD_LAYOUT
27876 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27877 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27880 /* Returns an expression indicating where the this parameter is
27881 located on entry to the FUNCTION. */
27884 x86_this_parameter (tree function)
27886 tree type = TREE_TYPE (function);
27887 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27892 const int *parm_regs;
27894 if (ix86_function_type_abi (type) == MS_ABI)
27895 parm_regs = x86_64_ms_abi_int_parameter_registers;
27897 parm_regs = x86_64_int_parameter_registers;
27898 return gen_rtx_REG (DImode, parm_regs[aggr]);
27901 nregs = ix86_function_regparm (type, function);
27903 if (nregs > 0 && !stdarg_p (type))
27907 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27908 regno = aggr ? DX_REG : CX_REG;
27909 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27913 return gen_rtx_MEM (SImode,
27914 plus_constant (stack_pointer_rtx, 4));
27923 return gen_rtx_MEM (SImode,
27924 plus_constant (stack_pointer_rtx, 4));
27927 return gen_rtx_REG (SImode, regno);
27930 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27933 /* Determine whether x86_output_mi_thunk can succeed. */
27936 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27937 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27938 HOST_WIDE_INT vcall_offset, const_tree function)
27940 /* 64-bit can handle anything. */
27944 /* For 32-bit, everything's fine if we have one free register. */
27945 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27948 /* Need a free register for vcall_offset. */
27952 /* Need a free register for GOT references. */
27953 if (flag_pic && !targetm.binds_local_p (function))
27956 /* Otherwise ok. */
27960 /* Output the assembler code for a thunk function. THUNK_DECL is the
27961 declaration for the thunk function itself, FUNCTION is the decl for
27962 the target function. DELTA is an immediate constant offset to be
27963 added to THIS. If VCALL_OFFSET is nonzero, the word at
27964 *(*this + vcall_offset) should be added to THIS. */
27967 x86_output_mi_thunk (FILE *file,
27968 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27969 HOST_WIDE_INT vcall_offset, tree function)
27972 rtx this_param = x86_this_parameter (function);
27975 /* Make sure unwind info is emitted for the thunk if needed. */
27976 final_start_function (emit_barrier (), file, 1);
27978 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27979 pull it in now and let DELTA benefit. */
27980 if (REG_P (this_param))
27981 this_reg = this_param;
27982 else if (vcall_offset)
27984 /* Put the this parameter into %eax. */
27985 xops[0] = this_param;
27986 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27987 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27990 this_reg = NULL_RTX;
27992 /* Adjust the this parameter by a fixed constant. */
27995 xops[0] = GEN_INT (delta);
27996 xops[1] = this_reg ? this_reg : this_param;
27999 if (!x86_64_general_operand (xops[0], DImode))
28001 tmp = gen_rtx_REG (DImode, R10_REG);
28003 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
28005 xops[1] = this_param;
28007 if (x86_maybe_negate_const_int (&xops[0], DImode))
28008 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
28010 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
28012 else if (x86_maybe_negate_const_int (&xops[0], SImode))
28013 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
28015 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
28018 /* Adjust the this parameter by a value stored in the vtable. */
28022 tmp = gen_rtx_REG (DImode, R10_REG);
28025 int tmp_regno = CX_REG;
28026 if (lookup_attribute ("fastcall",
28027 TYPE_ATTRIBUTES (TREE_TYPE (function)))
28028 || lookup_attribute ("thiscall",
28029 TYPE_ATTRIBUTES (TREE_TYPE (function))))
28030 tmp_regno = AX_REG;
28031 tmp = gen_rtx_REG (SImode, tmp_regno);
28034 xops[0] = gen_rtx_MEM (Pmode, this_reg);
28036 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28038 /* Adjust the this parameter. */
28039 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
28040 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
28042 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
28043 xops[0] = GEN_INT (vcall_offset);
28045 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
28046 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
28048 xops[1] = this_reg;
28049 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
28052 /* If necessary, drop THIS back to its stack slot. */
28053 if (this_reg && this_reg != this_param)
28055 xops[0] = this_reg;
28056 xops[1] = this_param;
28057 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28060 xops[0] = XEXP (DECL_RTL (function), 0);
28063 if (!flag_pic || targetm.binds_local_p (function))
28064 output_asm_insn ("jmp\t%P0", xops);
28065 /* All thunks should be in the same object as their target,
28066 and thus binds_local_p should be true. */
28067 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
28068 gcc_unreachable ();
28071 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
28072 tmp = gen_rtx_CONST (Pmode, tmp);
28073 tmp = gen_rtx_MEM (QImode, tmp);
28075 output_asm_insn ("jmp\t%A0", xops);
28080 if (!flag_pic || targetm.binds_local_p (function))
28081 output_asm_insn ("jmp\t%P0", xops);
28086 rtx sym_ref = XEXP (DECL_RTL (function), 0);
28087 if (TARGET_MACHO_BRANCH_ISLANDS)
28088 sym_ref = (gen_rtx_SYMBOL_REF
28090 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
28091 tmp = gen_rtx_MEM (QImode, sym_ref);
28093 output_asm_insn ("jmp\t%0", xops);
28096 #endif /* TARGET_MACHO */
28098 tmp = gen_rtx_REG (SImode, CX_REG);
28099 output_set_got (tmp, NULL_RTX);
28102 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
28103 output_asm_insn ("jmp\t{*}%1", xops);
28106 final_end_function ();
28110 x86_file_start (void)
28112 default_file_start ();
28114 darwin_file_start ();
28116 if (X86_FILE_START_VERSION_DIRECTIVE)
28117 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
28118 if (X86_FILE_START_FLTUSED)
28119 fputs ("\t.global\t__fltused\n", asm_out_file);
28120 if (ix86_asm_dialect == ASM_INTEL)
28121 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28125 x86_field_alignment (tree field, int computed)
28127 enum machine_mode mode;
28128 tree type = TREE_TYPE (field);
28130 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28132 mode = TYPE_MODE (strip_array_types (type));
28133 if (mode == DFmode || mode == DCmode
28134 || GET_MODE_CLASS (mode) == MODE_INT
28135 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28136 return MIN (32, computed);
28140 /* Output assembler code to FILE to increment profiler label # LABELNO
28141 for profiling a function entry. */
28143 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28145 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28150 #ifndef NO_PROFILE_COUNTERS
28151 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28154 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28155 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28157 fprintf (file, "\tcall\t%s\n", mcount_name);
28161 #ifndef NO_PROFILE_COUNTERS
28162 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28165 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28169 #ifndef NO_PROFILE_COUNTERS
28170 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28173 fprintf (file, "\tcall\t%s\n", mcount_name);
28177 /* We don't have exact information about the insn sizes, but we may assume
28178 quite safely that we are informed about all 1 byte insns and memory
28179 address sizes. This is enough to eliminate unnecessary padding in
28183 min_insn_size (rtx insn)
28187 if (!INSN_P (insn) || !active_insn_p (insn))
28190 /* Discard alignments we've emit and jump instructions. */
28191 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28192 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28194 if (JUMP_TABLE_DATA_P (insn))
28197 /* Important case - calls are always 5 bytes.
28198 It is common to have many calls in the row. */
28200 && symbolic_reference_mentioned_p (PATTERN (insn))
28201 && !SIBLING_CALL_P (insn))
28203 len = get_attr_length (insn);
28207 /* For normal instructions we rely on get_attr_length being exact,
28208 with a few exceptions. */
28209 if (!JUMP_P (insn))
28211 enum attr_type type = get_attr_type (insn);
28216 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28217 || asm_noperands (PATTERN (insn)) >= 0)
28224 /* Otherwise trust get_attr_length. */
28228 l = get_attr_length_address (insn);
28229 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28238 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28240 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28244 ix86_avoid_jump_mispredicts (void)
28246 rtx insn, start = get_insns ();
28247 int nbytes = 0, njumps = 0;
28250 /* Look for all minimal intervals of instructions containing 4 jumps.
28251 The intervals are bounded by START and INSN. NBYTES is the total
28252 size of instructions in the interval including INSN and not including
28253 START. When the NBYTES is smaller than 16 bytes, it is possible
28254 that the end of START and INSN ends up in the same 16byte page.
28256 The smallest offset in the page INSN can start is the case where START
28257 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28258 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28260 for (insn = start; insn; insn = NEXT_INSN (insn))
28264 if (LABEL_P (insn))
28266 int align = label_to_alignment (insn);
28267 int max_skip = label_to_max_skip (insn);
28271 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28272 already in the current 16 byte page, because otherwise
28273 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28274 bytes to reach 16 byte boundary. */
28276 || (align <= 3 && max_skip != (1 << align) - 1))
28279 fprintf (dump_file, "Label %i with max_skip %i\n",
28280 INSN_UID (insn), max_skip);
28283 while (nbytes + max_skip >= 16)
28285 start = NEXT_INSN (start);
28286 if ((JUMP_P (start)
28287 && GET_CODE (PATTERN (start)) != ADDR_VEC
28288 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28290 njumps--, isjump = 1;
28293 nbytes -= min_insn_size (start);
28299 min_size = min_insn_size (insn);
28300 nbytes += min_size;
28302 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28303 INSN_UID (insn), min_size);
28305 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28306 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28314 start = NEXT_INSN (start);
28315 if ((JUMP_P (start)
28316 && GET_CODE (PATTERN (start)) != ADDR_VEC
28317 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28319 njumps--, isjump = 1;
28322 nbytes -= min_insn_size (start);
28324 gcc_assert (njumps >= 0);
28326 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28327 INSN_UID (start), INSN_UID (insn), nbytes);
28329 if (njumps == 3 && isjump && nbytes < 16)
28331 int padsize = 15 - nbytes + min_insn_size (insn);
28334 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28335 INSN_UID (insn), padsize);
28336 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28342 /* AMD Athlon works faster
28343 when RET is not destination of conditional jump or directly preceded
28344 by other jump instruction. We avoid the penalty by inserting NOP just
28345 before the RET instructions in such cases. */
28347 ix86_pad_returns (void)
28352 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28354 basic_block bb = e->src;
28355 rtx ret = BB_END (bb);
28357 bool replace = false;
28359 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28360 || optimize_bb_for_size_p (bb))
28362 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28363 if (active_insn_p (prev) || LABEL_P (prev))
28365 if (prev && LABEL_P (prev))
28370 FOR_EACH_EDGE (e, ei, bb->preds)
28371 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28372 && !(e->flags & EDGE_FALLTHRU))
28377 prev = prev_active_insn (ret);
28379 && ((JUMP_P (prev) && any_condjump_p (prev))
28382 /* Empty functions get branch mispredict even when the jump destination
28383 is not visible to us. */
28384 if (!prev && !optimize_function_for_size_p (cfun))
28389 emit_jump_insn_before (gen_return_internal_long (), ret);
28395 /* Count the minimum number of instructions in BB. Return 4 if the
28396 number of instructions >= 4. */
28399 ix86_count_insn_bb (basic_block bb)
28402 int insn_count = 0;
28404 /* Count number of instructions in this block. Return 4 if the number
28405 of instructions >= 4. */
28406 FOR_BB_INSNS (bb, insn)
28408 /* Only happen in exit blocks. */
28410 && GET_CODE (PATTERN (insn)) == RETURN)
28413 if (NONDEBUG_INSN_P (insn)
28414 && GET_CODE (PATTERN (insn)) != USE
28415 && GET_CODE (PATTERN (insn)) != CLOBBER)
28418 if (insn_count >= 4)
28427 /* Count the minimum number of instructions in code path in BB.
28428 Return 4 if the number of instructions >= 4. */
28431 ix86_count_insn (basic_block bb)
28435 int min_prev_count;
28437 /* Only bother counting instructions along paths with no
28438 more than 2 basic blocks between entry and exit. Given
28439 that BB has an edge to exit, determine if a predecessor
28440 of BB has an edge from entry. If so, compute the number
28441 of instructions in the predecessor block. If there
28442 happen to be multiple such blocks, compute the minimum. */
28443 min_prev_count = 4;
28444 FOR_EACH_EDGE (e, ei, bb->preds)
28447 edge_iterator prev_ei;
28449 if (e->src == ENTRY_BLOCK_PTR)
28451 min_prev_count = 0;
28454 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28456 if (prev_e->src == ENTRY_BLOCK_PTR)
28458 int count = ix86_count_insn_bb (e->src);
28459 if (count < min_prev_count)
28460 min_prev_count = count;
28466 if (min_prev_count < 4)
28467 min_prev_count += ix86_count_insn_bb (bb);
28469 return min_prev_count;
28472 /* Pad short funtion to 4 instructions. */
28475 ix86_pad_short_function (void)
28480 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28482 rtx ret = BB_END (e->src);
28483 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28485 int insn_count = ix86_count_insn (e->src);
28487 /* Pad short function. */
28488 if (insn_count < 4)
28492 /* Find epilogue. */
28495 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28496 insn = PREV_INSN (insn);
28501 /* Two NOPs are counted as one instruction. */
28502 insn_count = 2 * (4 - insn_count);
28503 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28509 /* Implement machine specific optimizations. We implement padding of returns
28510 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28514 if (optimize && optimize_function_for_speed_p (cfun))
28516 if (TARGET_PAD_SHORT_FUNCTION)
28517 ix86_pad_short_function ();
28518 else if (TARGET_PAD_RETURNS)
28519 ix86_pad_returns ();
28520 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28521 if (TARGET_FOUR_JUMP_LIMIT)
28522 ix86_avoid_jump_mispredicts ();
28527 /* Return nonzero when QImode register that must be represented via REX prefix
28530 x86_extended_QIreg_mentioned_p (rtx insn)
28533 extract_insn_cached (insn);
28534 for (i = 0; i < recog_data.n_operands; i++)
28535 if (REG_P (recog_data.operand[i])
28536 && REGNO (recog_data.operand[i]) > BX_REG)
28541 /* Return nonzero when P points to register encoded via REX prefix.
28542 Called via for_each_rtx. */
28544 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28546 unsigned int regno;
28549 regno = REGNO (*p);
28550 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28553 /* Return true when INSN mentions register that must be encoded using REX
28556 x86_extended_reg_mentioned_p (rtx insn)
28558 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28559 extended_reg_mentioned_1, NULL);
28562 /* If profitable, negate (without causing overflow) integer constant
28563 of mode MODE at location LOC. Return true in this case. */
28565 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28569 if (!CONST_INT_P (*loc))
28575 /* DImode x86_64 constants must fit in 32 bits. */
28576 gcc_assert (x86_64_immediate_operand (*loc, mode));
28587 gcc_unreachable ();
28590 /* Avoid overflows. */
28591 if (mode_signbit_p (mode, *loc))
28594 val = INTVAL (*loc);
28596 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28597 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28598 if ((val < 0 && val != -128)
28601 *loc = GEN_INT (-val);
28608 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28609 optabs would emit if we didn't have TFmode patterns. */
28612 x86_emit_floatuns (rtx operands[2])
28614 rtx neglab, donelab, i0, i1, f0, in, out;
28615 enum machine_mode mode, inmode;
28617 inmode = GET_MODE (operands[1]);
28618 gcc_assert (inmode == SImode || inmode == DImode);
28621 in = force_reg (inmode, operands[1]);
28622 mode = GET_MODE (out);
28623 neglab = gen_label_rtx ();
28624 donelab = gen_label_rtx ();
28625 f0 = gen_reg_rtx (mode);
28627 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28629 expand_float (out, in, 0);
28631 emit_jump_insn (gen_jump (donelab));
28634 emit_label (neglab);
28636 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28638 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28640 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28642 expand_float (f0, i0, 0);
28644 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28646 emit_label (donelab);
28649 /* AVX does not support 32-byte integer vector operations,
28650 thus the longest vector we are faced with is V16QImode. */
28651 #define MAX_VECT_LEN 16
28653 struct expand_vec_perm_d
28655 rtx target, op0, op1;
28656 unsigned char perm[MAX_VECT_LEN];
28657 enum machine_mode vmode;
28658 unsigned char nelt;
28662 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28663 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28665 /* Get a vector mode of the same size as the original but with elements
28666 twice as wide. This is only guaranteed to apply to integral vectors. */
28668 static inline enum machine_mode
28669 get_mode_wider_vector (enum machine_mode o)
28671 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28672 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28673 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28674 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28678 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28679 with all elements equal to VAR. Return true if successful. */
28682 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28683 rtx target, rtx val)
28706 /* First attempt to recognize VAL as-is. */
28707 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28708 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28709 if (recog_memoized (insn) < 0)
28712 /* If that fails, force VAL into a register. */
28715 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28716 seq = get_insns ();
28719 emit_insn_before (seq, insn);
28721 ok = recog_memoized (insn) >= 0;
28730 if (TARGET_SSE || TARGET_3DNOW_A)
28734 val = gen_lowpart (SImode, val);
28735 x = gen_rtx_TRUNCATE (HImode, val);
28736 x = gen_rtx_VEC_DUPLICATE (mode, x);
28737 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28750 struct expand_vec_perm_d dperm;
28754 memset (&dperm, 0, sizeof (dperm));
28755 dperm.target = target;
28756 dperm.vmode = mode;
28757 dperm.nelt = GET_MODE_NUNITS (mode);
28758 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28760 /* Extend to SImode using a paradoxical SUBREG. */
28761 tmp1 = gen_reg_rtx (SImode);
28762 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28764 /* Insert the SImode value as low element of a V4SImode vector. */
28765 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28766 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28768 ok = (expand_vec_perm_1 (&dperm)
28769 || expand_vec_perm_broadcast_1 (&dperm));
28781 /* Replicate the value once into the next wider mode and recurse. */
28783 enum machine_mode smode, wsmode, wvmode;
28786 smode = GET_MODE_INNER (mode);
28787 wvmode = get_mode_wider_vector (mode);
28788 wsmode = GET_MODE_INNER (wvmode);
28790 val = convert_modes (wsmode, smode, val, true);
28791 x = expand_simple_binop (wsmode, ASHIFT, val,
28792 GEN_INT (GET_MODE_BITSIZE (smode)),
28793 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28794 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28796 x = gen_lowpart (wvmode, target);
28797 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28805 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28806 rtx x = gen_reg_rtx (hvmode);
28808 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28811 x = gen_rtx_VEC_CONCAT (mode, x, x);
28812 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28821 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28822 whose ONE_VAR element is VAR, and other elements are zero. Return true
28826 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28827 rtx target, rtx var, int one_var)
28829 enum machine_mode vsimode;
28832 bool use_vector_set = false;
28837 /* For SSE4.1, we normally use vector set. But if the second
28838 element is zero and inter-unit moves are OK, we use movq
28840 use_vector_set = (TARGET_64BIT
28842 && !(TARGET_INTER_UNIT_MOVES
28848 use_vector_set = TARGET_SSE4_1;
28851 use_vector_set = TARGET_SSE2;
28854 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28861 use_vector_set = TARGET_AVX;
28864 /* Use ix86_expand_vector_set in 64bit mode only. */
28865 use_vector_set = TARGET_AVX && TARGET_64BIT;
28871 if (use_vector_set)
28873 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28874 var = force_reg (GET_MODE_INNER (mode), var);
28875 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28891 var = force_reg (GET_MODE_INNER (mode), var);
28892 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28893 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28898 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28899 new_target = gen_reg_rtx (mode);
28901 new_target = target;
28902 var = force_reg (GET_MODE_INNER (mode), var);
28903 x = gen_rtx_VEC_DUPLICATE (mode, var);
28904 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28905 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28908 /* We need to shuffle the value to the correct position, so
28909 create a new pseudo to store the intermediate result. */
28911 /* With SSE2, we can use the integer shuffle insns. */
28912 if (mode != V4SFmode && TARGET_SSE2)
28914 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28916 GEN_INT (one_var == 1 ? 0 : 1),
28917 GEN_INT (one_var == 2 ? 0 : 1),
28918 GEN_INT (one_var == 3 ? 0 : 1)));
28919 if (target != new_target)
28920 emit_move_insn (target, new_target);
28924 /* Otherwise convert the intermediate result to V4SFmode and
28925 use the SSE1 shuffle instructions. */
28926 if (mode != V4SFmode)
28928 tmp = gen_reg_rtx (V4SFmode);
28929 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28934 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28936 GEN_INT (one_var == 1 ? 0 : 1),
28937 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28938 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28940 if (mode != V4SFmode)
28941 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28942 else if (tmp != target)
28943 emit_move_insn (target, tmp);
28945 else if (target != new_target)
28946 emit_move_insn (target, new_target);
28951 vsimode = V4SImode;
28957 vsimode = V2SImode;
28963 /* Zero extend the variable element to SImode and recurse. */
28964 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28966 x = gen_reg_rtx (vsimode);
28967 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28969 gcc_unreachable ();
28971 emit_move_insn (target, gen_lowpart (mode, x));
28979 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28980 consisting of the values in VALS. It is known that all elements
28981 except ONE_VAR are constants. Return true if successful. */
28984 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28985 rtx target, rtx vals, int one_var)
28987 rtx var = XVECEXP (vals, 0, one_var);
28988 enum machine_mode wmode;
28991 const_vec = copy_rtx (vals);
28992 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28993 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
29001 /* For the two element vectors, it's just as easy to use
29002 the general case. */
29006 /* Use ix86_expand_vector_set in 64bit mode only. */
29029 /* There's no way to set one QImode entry easily. Combine
29030 the variable value with its adjacent constant value, and
29031 promote to an HImode set. */
29032 x = XVECEXP (vals, 0, one_var ^ 1);
29035 var = convert_modes (HImode, QImode, var, true);
29036 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
29037 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29038 x = GEN_INT (INTVAL (x) & 0xff);
29042 var = convert_modes (HImode, QImode, var, true);
29043 x = gen_int_mode (INTVAL (x) << 8, HImode);
29045 if (x != const0_rtx)
29046 var = expand_simple_binop (HImode, IOR, var, x, var,
29047 1, OPTAB_LIB_WIDEN);
29049 x = gen_reg_rtx (wmode);
29050 emit_move_insn (x, gen_lowpart (wmode, const_vec));
29051 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
29053 emit_move_insn (target, gen_lowpart (mode, x));
29060 emit_move_insn (target, const_vec);
29061 ix86_expand_vector_set (mmx_ok, target, var, one_var);
29065 /* A subroutine of ix86_expand_vector_init_general. Use vector
29066 concatenate to handle the most general case: all values variable,
29067 and none identical. */
29070 ix86_expand_vector_init_concat (enum machine_mode mode,
29071 rtx target, rtx *ops, int n)
29073 enum machine_mode cmode, hmode = VOIDmode;
29074 rtx first[8], second[4];
29114 gcc_unreachable ();
29117 if (!register_operand (ops[1], cmode))
29118 ops[1] = force_reg (cmode, ops[1]);
29119 if (!register_operand (ops[0], cmode))
29120 ops[0] = force_reg (cmode, ops[0]);
29121 emit_insn (gen_rtx_SET (VOIDmode, target,
29122 gen_rtx_VEC_CONCAT (mode, ops[0],
29142 gcc_unreachable ();
29158 gcc_unreachable ();
29163 /* FIXME: We process inputs backward to help RA. PR 36222. */
29166 for (; i > 0; i -= 2, j--)
29168 first[j] = gen_reg_rtx (cmode);
29169 v = gen_rtvec (2, ops[i - 1], ops[i]);
29170 ix86_expand_vector_init (false, first[j],
29171 gen_rtx_PARALLEL (cmode, v));
29177 gcc_assert (hmode != VOIDmode);
29178 for (i = j = 0; i < n; i += 2, j++)
29180 second[j] = gen_reg_rtx (hmode);
29181 ix86_expand_vector_init_concat (hmode, second [j],
29185 ix86_expand_vector_init_concat (mode, target, second, n);
29188 ix86_expand_vector_init_concat (mode, target, first, n);
29192 gcc_unreachable ();
29196 /* A subroutine of ix86_expand_vector_init_general. Use vector
29197 interleave to handle the most general case: all values variable,
29198 and none identical. */
29201 ix86_expand_vector_init_interleave (enum machine_mode mode,
29202 rtx target, rtx *ops, int n)
29204 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29207 rtx (*gen_load_even) (rtx, rtx, rtx);
29208 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29209 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29214 gen_load_even = gen_vec_setv8hi;
29215 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29216 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29217 inner_mode = HImode;
29218 first_imode = V4SImode;
29219 second_imode = V2DImode;
29220 third_imode = VOIDmode;
29223 gen_load_even = gen_vec_setv16qi;
29224 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29225 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29226 inner_mode = QImode;
29227 first_imode = V8HImode;
29228 second_imode = V4SImode;
29229 third_imode = V2DImode;
29232 gcc_unreachable ();
29235 for (i = 0; i < n; i++)
29237 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29238 op0 = gen_reg_rtx (SImode);
29239 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29241 /* Insert the SImode value as low element of V4SImode vector. */
29242 op1 = gen_reg_rtx (V4SImode);
29243 op0 = gen_rtx_VEC_MERGE (V4SImode,
29244 gen_rtx_VEC_DUPLICATE (V4SImode,
29246 CONST0_RTX (V4SImode),
29248 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29250 /* Cast the V4SImode vector back to a vector in orignal mode. */
29251 op0 = gen_reg_rtx (mode);
29252 emit_move_insn (op0, gen_lowpart (mode, op1));
29254 /* Load even elements into the second positon. */
29255 emit_insn (gen_load_even (op0,
29256 force_reg (inner_mode,
29260 /* Cast vector to FIRST_IMODE vector. */
29261 ops[i] = gen_reg_rtx (first_imode);
29262 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29265 /* Interleave low FIRST_IMODE vectors. */
29266 for (i = j = 0; i < n; i += 2, j++)
29268 op0 = gen_reg_rtx (first_imode);
29269 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29271 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29272 ops[j] = gen_reg_rtx (second_imode);
29273 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29276 /* Interleave low SECOND_IMODE vectors. */
29277 switch (second_imode)
29280 for (i = j = 0; i < n / 2; i += 2, j++)
29282 op0 = gen_reg_rtx (second_imode);
29283 emit_insn (gen_interleave_second_low (op0, ops[i],
29286 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29288 ops[j] = gen_reg_rtx (third_imode);
29289 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29291 second_imode = V2DImode;
29292 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29296 op0 = gen_reg_rtx (second_imode);
29297 emit_insn (gen_interleave_second_low (op0, ops[0],
29300 /* Cast the SECOND_IMODE vector back to a vector on original
29302 emit_insn (gen_rtx_SET (VOIDmode, target,
29303 gen_lowpart (mode, op0)));
29307 gcc_unreachable ();
29311 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29312 all values variable, and none identical. */
29315 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29316 rtx target, rtx vals)
29318 rtx ops[32], op0, op1;
29319 enum machine_mode half_mode = VOIDmode;
29326 if (!mmx_ok && !TARGET_SSE)
29338 n = GET_MODE_NUNITS (mode);
29339 for (i = 0; i < n; i++)
29340 ops[i] = XVECEXP (vals, 0, i);
29341 ix86_expand_vector_init_concat (mode, target, ops, n);
29345 half_mode = V16QImode;
29349 half_mode = V8HImode;
29353 n = GET_MODE_NUNITS (mode);
29354 for (i = 0; i < n; i++)
29355 ops[i] = XVECEXP (vals, 0, i);
29356 op0 = gen_reg_rtx (half_mode);
29357 op1 = gen_reg_rtx (half_mode);
29358 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29360 ix86_expand_vector_init_interleave (half_mode, op1,
29361 &ops [n >> 1], n >> 2);
29362 emit_insn (gen_rtx_SET (VOIDmode, target,
29363 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29367 if (!TARGET_SSE4_1)
29375 /* Don't use ix86_expand_vector_init_interleave if we can't
29376 move from GPR to SSE register directly. */
29377 if (!TARGET_INTER_UNIT_MOVES)
29380 n = GET_MODE_NUNITS (mode);
29381 for (i = 0; i < n; i++)
29382 ops[i] = XVECEXP (vals, 0, i);
29383 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29391 gcc_unreachable ();
29395 int i, j, n_elts, n_words, n_elt_per_word;
29396 enum machine_mode inner_mode;
29397 rtx words[4], shift;
29399 inner_mode = GET_MODE_INNER (mode);
29400 n_elts = GET_MODE_NUNITS (mode);
29401 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29402 n_elt_per_word = n_elts / n_words;
29403 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29405 for (i = 0; i < n_words; ++i)
29407 rtx word = NULL_RTX;
29409 for (j = 0; j < n_elt_per_word; ++j)
29411 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29412 elt = convert_modes (word_mode, inner_mode, elt, true);
29418 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29419 word, 1, OPTAB_LIB_WIDEN);
29420 word = expand_simple_binop (word_mode, IOR, word, elt,
29421 word, 1, OPTAB_LIB_WIDEN);
29429 emit_move_insn (target, gen_lowpart (mode, words[0]));
29430 else if (n_words == 2)
29432 rtx tmp = gen_reg_rtx (mode);
29433 emit_clobber (tmp);
29434 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29435 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29436 emit_move_insn (target, tmp);
29438 else if (n_words == 4)
29440 rtx tmp = gen_reg_rtx (V4SImode);
29441 gcc_assert (word_mode == SImode);
29442 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29443 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29444 emit_move_insn (target, gen_lowpart (mode, tmp));
29447 gcc_unreachable ();
29451 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29452 instructions unless MMX_OK is true. */
29455 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29457 enum machine_mode mode = GET_MODE (target);
29458 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29459 int n_elts = GET_MODE_NUNITS (mode);
29460 int n_var = 0, one_var = -1;
29461 bool all_same = true, all_const_zero = true;
29465 for (i = 0; i < n_elts; ++i)
29467 x = XVECEXP (vals, 0, i);
29468 if (!(CONST_INT_P (x)
29469 || GET_CODE (x) == CONST_DOUBLE
29470 || GET_CODE (x) == CONST_FIXED))
29471 n_var++, one_var = i;
29472 else if (x != CONST0_RTX (inner_mode))
29473 all_const_zero = false;
29474 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
29478 /* Constants are best loaded from the constant pool. */
29481 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29485 /* If all values are identical, broadcast the value. */
29487 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29488 XVECEXP (vals, 0, 0)))
29491 /* Values where only one field is non-constant are best loaded from
29492 the pool and overwritten via move later. */
29496 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29497 XVECEXP (vals, 0, one_var),
29501 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29505 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29509 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29511 enum machine_mode mode = GET_MODE (target);
29512 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29513 enum machine_mode half_mode;
29514 bool use_vec_merge = false;
29516 static rtx (*gen_extract[6][2]) (rtx, rtx)
29518 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29519 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29520 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29521 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29522 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29523 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29525 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29527 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29528 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29529 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29530 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29531 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29532 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29542 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29543 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29545 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29547 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29548 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29554 use_vec_merge = TARGET_SSE4_1;
29562 /* For the two element vectors, we implement a VEC_CONCAT with
29563 the extraction of the other element. */
29565 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29566 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29569 op0 = val, op1 = tmp;
29571 op0 = tmp, op1 = val;
29573 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29574 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29579 use_vec_merge = TARGET_SSE4_1;
29586 use_vec_merge = true;
29590 /* tmp = target = A B C D */
29591 tmp = copy_to_reg (target);
29592 /* target = A A B B */
29593 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29594 /* target = X A B B */
29595 ix86_expand_vector_set (false, target, val, 0);
29596 /* target = A X C D */
29597 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29598 const1_rtx, const0_rtx,
29599 GEN_INT (2+4), GEN_INT (3+4)));
29603 /* tmp = target = A B C D */
29604 tmp = copy_to_reg (target);
29605 /* tmp = X B C D */
29606 ix86_expand_vector_set (false, tmp, val, 0);
29607 /* target = A B X D */
29608 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29609 const0_rtx, const1_rtx,
29610 GEN_INT (0+4), GEN_INT (3+4)));
29614 /* tmp = target = A B C D */
29615 tmp = copy_to_reg (target);
29616 /* tmp = X B C D */
29617 ix86_expand_vector_set (false, tmp, val, 0);
29618 /* target = A B X D */
29619 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29620 const0_rtx, const1_rtx,
29621 GEN_INT (2+4), GEN_INT (0+4)));
29625 gcc_unreachable ();
29630 use_vec_merge = TARGET_SSE4_1;
29634 /* Element 0 handled by vec_merge below. */
29637 use_vec_merge = true;
29643 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29644 store into element 0, then shuffle them back. */
29648 order[0] = GEN_INT (elt);
29649 order[1] = const1_rtx;
29650 order[2] = const2_rtx;
29651 order[3] = GEN_INT (3);
29652 order[elt] = const0_rtx;
29654 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29655 order[1], order[2], order[3]));
29657 ix86_expand_vector_set (false, target, val, 0);
29659 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29660 order[1], order[2], order[3]));
29664 /* For SSE1, we have to reuse the V4SF code. */
29665 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29666 gen_lowpart (SFmode, val), elt);
29671 use_vec_merge = TARGET_SSE2;
29674 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29678 use_vec_merge = TARGET_SSE4_1;
29685 half_mode = V16QImode;
29691 half_mode = V8HImode;
29697 half_mode = V4SImode;
29703 half_mode = V2DImode;
29709 half_mode = V4SFmode;
29715 half_mode = V2DFmode;
29721 /* Compute offset. */
29725 gcc_assert (i <= 1);
29727 /* Extract the half. */
29728 tmp = gen_reg_rtx (half_mode);
29729 emit_insn (gen_extract[j][i] (tmp, target));
29731 /* Put val in tmp at elt. */
29732 ix86_expand_vector_set (false, tmp, val, elt);
29735 emit_insn (gen_insert[j][i] (target, target, tmp));
29744 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29745 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29746 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29750 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29752 emit_move_insn (mem, target);
29754 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29755 emit_move_insn (tmp, val);
29757 emit_move_insn (target, mem);
29762 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29764 enum machine_mode mode = GET_MODE (vec);
29765 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29766 bool use_vec_extr = false;
29779 use_vec_extr = true;
29783 use_vec_extr = TARGET_SSE4_1;
29795 tmp = gen_reg_rtx (mode);
29796 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29797 GEN_INT (elt), GEN_INT (elt),
29798 GEN_INT (elt+4), GEN_INT (elt+4)));
29802 tmp = gen_reg_rtx (mode);
29803 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29807 gcc_unreachable ();
29810 use_vec_extr = true;
29815 use_vec_extr = TARGET_SSE4_1;
29829 tmp = gen_reg_rtx (mode);
29830 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29831 GEN_INT (elt), GEN_INT (elt),
29832 GEN_INT (elt), GEN_INT (elt)));
29836 tmp = gen_reg_rtx (mode);
29837 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29841 gcc_unreachable ();
29844 use_vec_extr = true;
29849 /* For SSE1, we have to reuse the V4SF code. */
29850 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29851 gen_lowpart (V4SFmode, vec), elt);
29857 use_vec_extr = TARGET_SSE2;
29860 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29864 use_vec_extr = TARGET_SSE4_1;
29868 /* ??? Could extract the appropriate HImode element and shift. */
29875 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29876 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29878 /* Let the rtl optimizers know about the zero extension performed. */
29879 if (inner_mode == QImode || inner_mode == HImode)
29881 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29882 target = gen_lowpart (SImode, target);
29885 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29889 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29891 emit_move_insn (mem, vec);
29893 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29894 emit_move_insn (target, tmp);
29898 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29899 pattern to reduce; DEST is the destination; IN is the input vector. */
29902 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29904 rtx tmp1, tmp2, tmp3;
29906 tmp1 = gen_reg_rtx (V4SFmode);
29907 tmp2 = gen_reg_rtx (V4SFmode);
29908 tmp3 = gen_reg_rtx (V4SFmode);
29910 emit_insn (gen_sse_movhlps (tmp1, in, in));
29911 emit_insn (fn (tmp2, tmp1, in));
29913 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29914 const1_rtx, const1_rtx,
29915 GEN_INT (1+4), GEN_INT (1+4)));
29916 emit_insn (fn (dest, tmp2, tmp3));
29919 /* Target hook for scalar_mode_supported_p. */
29921 ix86_scalar_mode_supported_p (enum machine_mode mode)
29923 if (DECIMAL_FLOAT_MODE_P (mode))
29924 return default_decimal_float_supported_p ();
29925 else if (mode == TFmode)
29928 return default_scalar_mode_supported_p (mode);
29931 /* Implements target hook vector_mode_supported_p. */
29933 ix86_vector_mode_supported_p (enum machine_mode mode)
29935 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29937 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29939 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29941 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29943 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29948 /* Target hook for c_mode_for_suffix. */
29949 static enum machine_mode
29950 ix86_c_mode_for_suffix (char suffix)
29960 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29962 We do this in the new i386 backend to maintain source compatibility
29963 with the old cc0-based compiler. */
29966 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29967 tree inputs ATTRIBUTE_UNUSED,
29970 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29972 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29977 /* Implements target vector targetm.asm.encode_section_info. This
29978 is not used by netware. */
29980 static void ATTRIBUTE_UNUSED
29981 ix86_encode_section_info (tree decl, rtx rtl, int first)
29983 default_encode_section_info (decl, rtl, first);
29985 if (TREE_CODE (decl) == VAR_DECL
29986 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29987 && ix86_in_large_data_p (decl))
29988 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29991 /* Worker function for REVERSE_CONDITION. */
29994 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29996 return (mode != CCFPmode && mode != CCFPUmode
29997 ? reverse_condition (code)
29998 : reverse_condition_maybe_unordered (code));
30001 /* Output code to perform an x87 FP register move, from OPERANDS[1]
30005 output_387_reg_move (rtx insn, rtx *operands)
30007 if (REG_P (operands[0]))
30009 if (REG_P (operands[1])
30010 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
30012 if (REGNO (operands[0]) == FIRST_STACK_REG)
30013 return output_387_ffreep (operands, 0);
30014 return "fstp\t%y0";
30016 if (STACK_TOP_P (operands[0]))
30017 return "fld%Z1\t%y1";
30020 else if (MEM_P (operands[0]))
30022 gcc_assert (REG_P (operands[1]));
30023 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
30024 return "fstp%Z0\t%y0";
30027 /* There is no non-popping store to memory for XFmode.
30028 So if we need one, follow the store with a load. */
30029 if (GET_MODE (operands[0]) == XFmode)
30030 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
30032 return "fst%Z0\t%y0";
30039 /* Output code to perform a conditional jump to LABEL, if C2 flag in
30040 FP status register is set. */
30043 ix86_emit_fp_unordered_jump (rtx label)
30045 rtx reg = gen_reg_rtx (HImode);
30048 emit_insn (gen_x86_fnstsw_1 (reg));
30050 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
30052 emit_insn (gen_x86_sahf_1 (reg));
30054 temp = gen_rtx_REG (CCmode, FLAGS_REG);
30055 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
30059 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
30061 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
30062 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
30065 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
30066 gen_rtx_LABEL_REF (VOIDmode, label),
30068 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
30070 emit_jump_insn (temp);
30071 predict_jump (REG_BR_PROB_BASE * 10 / 100);
30074 /* Output code to perform a log1p XFmode calculation. */
30076 void ix86_emit_i387_log1p (rtx op0, rtx op1)
30078 rtx label1 = gen_label_rtx ();
30079 rtx label2 = gen_label_rtx ();
30081 rtx tmp = gen_reg_rtx (XFmode);
30082 rtx tmp2 = gen_reg_rtx (XFmode);
30085 emit_insn (gen_absxf2 (tmp, op1));
30086 test = gen_rtx_GE (VOIDmode, tmp,
30087 CONST_DOUBLE_FROM_REAL_VALUE (
30088 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
30090 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
30092 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30093 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
30094 emit_jump (label2);
30096 emit_label (label1);
30097 emit_move_insn (tmp, CONST1_RTX (XFmode));
30098 emit_insn (gen_addxf3 (tmp, op1, tmp));
30099 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30100 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
30102 emit_label (label2);
30105 /* Output code to perform a Newton-Rhapson approximation of a single precision
30106 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
30108 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
30110 rtx x0, x1, e0, e1, two;
30112 x0 = gen_reg_rtx (mode);
30113 e0 = gen_reg_rtx (mode);
30114 e1 = gen_reg_rtx (mode);
30115 x1 = gen_reg_rtx (mode);
30117 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
30119 if (VECTOR_MODE_P (mode))
30120 two = ix86_build_const_vector (mode, true, two);
30122 two = force_reg (mode, two);
30124 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30126 /* x0 = rcp(b) estimate */
30127 emit_insn (gen_rtx_SET (VOIDmode, x0,
30128 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30131 emit_insn (gen_rtx_SET (VOIDmode, e0,
30132 gen_rtx_MULT (mode, x0, a)));
30134 emit_insn (gen_rtx_SET (VOIDmode, e1,
30135 gen_rtx_MULT (mode, x0, b)));
30137 emit_insn (gen_rtx_SET (VOIDmode, x1,
30138 gen_rtx_MINUS (mode, two, e1)));
30139 /* res = e0 * x1 */
30140 emit_insn (gen_rtx_SET (VOIDmode, res,
30141 gen_rtx_MULT (mode, e0, x1)));
30144 /* Output code to perform a Newton-Rhapson approximation of a
30145 single precision floating point [reciprocal] square root. */
30147 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30150 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30153 x0 = gen_reg_rtx (mode);
30154 e0 = gen_reg_rtx (mode);
30155 e1 = gen_reg_rtx (mode);
30156 e2 = gen_reg_rtx (mode);
30157 e3 = gen_reg_rtx (mode);
30159 real_from_integer (&r, VOIDmode, -3, -1, 0);
30160 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30162 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30163 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30165 if (VECTOR_MODE_P (mode))
30167 mthree = ix86_build_const_vector (mode, true, mthree);
30168 mhalf = ix86_build_const_vector (mode, true, mhalf);
30171 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30172 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30174 /* x0 = rsqrt(a) estimate */
30175 emit_insn (gen_rtx_SET (VOIDmode, x0,
30176 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30179 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30184 zero = gen_reg_rtx (mode);
30185 mask = gen_reg_rtx (mode);
30187 zero = force_reg (mode, CONST0_RTX(mode));
30188 emit_insn (gen_rtx_SET (VOIDmode, mask,
30189 gen_rtx_NE (mode, zero, a)));
30191 emit_insn (gen_rtx_SET (VOIDmode, x0,
30192 gen_rtx_AND (mode, x0, mask)));
30196 emit_insn (gen_rtx_SET (VOIDmode, e0,
30197 gen_rtx_MULT (mode, x0, a)));
30199 emit_insn (gen_rtx_SET (VOIDmode, e1,
30200 gen_rtx_MULT (mode, e0, x0)));
30203 mthree = force_reg (mode, mthree);
30204 emit_insn (gen_rtx_SET (VOIDmode, e2,
30205 gen_rtx_PLUS (mode, e1, mthree)));
30207 mhalf = force_reg (mode, mhalf);
30209 /* e3 = -.5 * x0 */
30210 emit_insn (gen_rtx_SET (VOIDmode, e3,
30211 gen_rtx_MULT (mode, x0, mhalf)));
30213 /* e3 = -.5 * e0 */
30214 emit_insn (gen_rtx_SET (VOIDmode, e3,
30215 gen_rtx_MULT (mode, e0, mhalf)));
30216 /* ret = e2 * e3 */
30217 emit_insn (gen_rtx_SET (VOIDmode, res,
30218 gen_rtx_MULT (mode, e2, e3)));
30221 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30223 static void ATTRIBUTE_UNUSED
30224 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30227 /* With Binutils 2.15, the "@unwind" marker must be specified on
30228 every occurrence of the ".eh_frame" section, not just the first
30231 && strcmp (name, ".eh_frame") == 0)
30233 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30234 flags & SECTION_WRITE ? "aw" : "a");
30237 default_elf_asm_named_section (name, flags, decl);
30240 /* Return the mangling of TYPE if it is an extended fundamental type. */
30242 static const char *
30243 ix86_mangle_type (const_tree type)
30245 type = TYPE_MAIN_VARIANT (type);
30247 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30248 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30251 switch (TYPE_MODE (type))
30254 /* __float128 is "g". */
30257 /* "long double" or __float80 is "e". */
30264 /* For 32-bit code we can save PIC register setup by using
30265 __stack_chk_fail_local hidden function instead of calling
30266 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30267 register, so it is better to call __stack_chk_fail directly. */
30270 ix86_stack_protect_fail (void)
30272 return TARGET_64BIT
30273 ? default_external_stack_protect_fail ()
30274 : default_hidden_stack_protect_fail ();
30277 /* Select a format to encode pointers in exception handling data. CODE
30278 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30279 true if the symbol may be affected by dynamic relocations.
30281 ??? All x86 object file formats are capable of representing this.
30282 After all, the relocation needed is the same as for the call insn.
30283 Whether or not a particular assembler allows us to enter such, I
30284 guess we'll have to see. */
30286 asm_preferred_eh_data_format (int code, int global)
30290 int type = DW_EH_PE_sdata8;
30292 || ix86_cmodel == CM_SMALL_PIC
30293 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30294 type = DW_EH_PE_sdata4;
30295 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30297 if (ix86_cmodel == CM_SMALL
30298 || (ix86_cmodel == CM_MEDIUM && code))
30299 return DW_EH_PE_udata4;
30300 return DW_EH_PE_absptr;
30303 /* Expand copysign from SIGN to the positive value ABS_VALUE
30304 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30307 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30309 enum machine_mode mode = GET_MODE (sign);
30310 rtx sgn = gen_reg_rtx (mode);
30311 if (mask == NULL_RTX)
30313 enum machine_mode vmode;
30315 if (mode == SFmode)
30317 else if (mode == DFmode)
30322 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
30323 if (!VECTOR_MODE_P (mode))
30325 /* We need to generate a scalar mode mask in this case. */
30326 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30327 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30328 mask = gen_reg_rtx (mode);
30329 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30333 mask = gen_rtx_NOT (mode, mask);
30334 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30335 gen_rtx_AND (mode, mask, sign)));
30336 emit_insn (gen_rtx_SET (VOIDmode, result,
30337 gen_rtx_IOR (mode, abs_value, sgn)));
30340 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30341 mask for masking out the sign-bit is stored in *SMASK, if that is
30344 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30346 enum machine_mode vmode, mode = GET_MODE (op0);
30349 xa = gen_reg_rtx (mode);
30350 if (mode == SFmode)
30352 else if (mode == DFmode)
30356 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
30357 if (!VECTOR_MODE_P (mode))
30359 /* We need to generate a scalar mode mask in this case. */
30360 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30361 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30362 mask = gen_reg_rtx (mode);
30363 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30365 emit_insn (gen_rtx_SET (VOIDmode, xa,
30366 gen_rtx_AND (mode, op0, mask)));
30374 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30375 swapping the operands if SWAP_OPERANDS is true. The expanded
30376 code is a forward jump to a newly created label in case the
30377 comparison is true. The generated label rtx is returned. */
30379 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30380 bool swap_operands)
30391 label = gen_label_rtx ();
30392 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30393 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30394 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30395 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30396 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30397 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30398 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30399 JUMP_LABEL (tmp) = label;
30404 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30405 using comparison code CODE. Operands are swapped for the comparison if
30406 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30408 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30409 bool swap_operands)
30411 enum machine_mode mode = GET_MODE (op0);
30412 rtx mask = gen_reg_rtx (mode);
30421 if (mode == DFmode)
30422 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30423 gen_rtx_fmt_ee (code, mode, op0, op1)));
30425 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30426 gen_rtx_fmt_ee (code, mode, op0, op1)));
30431 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30432 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30434 ix86_gen_TWO52 (enum machine_mode mode)
30436 REAL_VALUE_TYPE TWO52r;
30439 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30440 TWO52 = const_double_from_real_value (TWO52r, mode);
30441 TWO52 = force_reg (mode, TWO52);
30446 /* Expand SSE sequence for computing lround from OP1 storing
30449 ix86_expand_lround (rtx op0, rtx op1)
30451 /* C code for the stuff we're doing below:
30452 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30455 enum machine_mode mode = GET_MODE (op1);
30456 const struct real_format *fmt;
30457 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30460 /* load nextafter (0.5, 0.0) */
30461 fmt = REAL_MODE_FORMAT (mode);
30462 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30463 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30465 /* adj = copysign (0.5, op1) */
30466 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30467 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
30469 /* adj = op1 + adj */
30470 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
30472 /* op0 = (imode)adj */
30473 expand_fix (op0, adj, 0);
30476 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
30479 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
30481 /* C code for the stuff we're doing below (for do_floor):
30483 xi -= (double)xi > op1 ? 1 : 0;
30486 enum machine_mode fmode = GET_MODE (op1);
30487 enum machine_mode imode = GET_MODE (op0);
30488 rtx ireg, freg, label, tmp;
30490 /* reg = (long)op1 */
30491 ireg = gen_reg_rtx (imode);
30492 expand_fix (ireg, op1, 0);
30494 /* freg = (double)reg */
30495 freg = gen_reg_rtx (fmode);
30496 expand_float (freg, ireg, 0);
30498 /* ireg = (freg > op1) ? ireg - 1 : ireg */
30499 label = ix86_expand_sse_compare_and_jump (UNLE,
30500 freg, op1, !do_floor);
30501 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30502 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30503 emit_move_insn (ireg, tmp);
30505 emit_label (label);
30506 LABEL_NUSES (label) = 1;
30508 emit_move_insn (op0, ireg);
30511 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30512 result in OPERAND0. */
30514 ix86_expand_rint (rtx operand0, rtx operand1)
30516 /* C code for the stuff we're doing below:
30517 xa = fabs (operand1);
30518 if (!isless (xa, 2**52))
30520 xa = xa + 2**52 - 2**52;
30521 return copysign (xa, operand1);
30523 enum machine_mode mode = GET_MODE (operand0);
30524 rtx res, xa, label, TWO52, mask;
30526 res = gen_reg_rtx (mode);
30527 emit_move_insn (res, operand1);
30529 /* xa = abs (operand1) */
30530 xa = ix86_expand_sse_fabs (res, &mask);
30532 /* if (!isless (xa, TWO52)) goto label; */
30533 TWO52 = ix86_gen_TWO52 (mode);
30534 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30536 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30537 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30539 ix86_sse_copysign_to_positive (res, xa, res, mask);
30541 emit_label (label);
30542 LABEL_NUSES (label) = 1;
30544 emit_move_insn (operand0, res);
30547 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30550 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30552 /* C code for the stuff we expand below.
30553 double xa = fabs (x), x2;
30554 if (!isless (xa, TWO52))
30556 xa = xa + TWO52 - TWO52;
30557 x2 = copysign (xa, x);
30566 enum machine_mode mode = GET_MODE (operand0);
30567 rtx xa, TWO52, tmp, label, one, res, mask;
30569 TWO52 = ix86_gen_TWO52 (mode);
30571 /* Temporary for holding the result, initialized to the input
30572 operand to ease control flow. */
30573 res = gen_reg_rtx (mode);
30574 emit_move_insn (res, operand1);
30576 /* xa = abs (operand1) */
30577 xa = ix86_expand_sse_fabs (res, &mask);
30579 /* if (!isless (xa, TWO52)) goto label; */
30580 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30582 /* xa = xa + TWO52 - TWO52; */
30583 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30584 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30586 /* xa = copysign (xa, operand1) */
30587 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30589 /* generate 1.0 or -1.0 */
30590 one = force_reg (mode,
30591 const_double_from_real_value (do_floor
30592 ? dconst1 : dconstm1, mode));
30594 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30595 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30596 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30597 gen_rtx_AND (mode, one, tmp)));
30598 /* We always need to subtract here to preserve signed zero. */
30599 tmp = expand_simple_binop (mode, MINUS,
30600 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30601 emit_move_insn (res, tmp);
30603 emit_label (label);
30604 LABEL_NUSES (label) = 1;
30606 emit_move_insn (operand0, res);
30609 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30612 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30614 /* C code for the stuff we expand below.
30615 double xa = fabs (x), x2;
30616 if (!isless (xa, TWO52))
30618 x2 = (double)(long)x;
30625 if (HONOR_SIGNED_ZEROS (mode))
30626 return copysign (x2, x);
30629 enum machine_mode mode = GET_MODE (operand0);
30630 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30632 TWO52 = ix86_gen_TWO52 (mode);
30634 /* Temporary for holding the result, initialized to the input
30635 operand to ease control flow. */
30636 res = gen_reg_rtx (mode);
30637 emit_move_insn (res, operand1);
30639 /* xa = abs (operand1) */
30640 xa = ix86_expand_sse_fabs (res, &mask);
30642 /* if (!isless (xa, TWO52)) goto label; */
30643 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30645 /* xa = (double)(long)x */
30646 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30647 expand_fix (xi, res, 0);
30648 expand_float (xa, xi, 0);
30651 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30653 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30654 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30655 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30656 gen_rtx_AND (mode, one, tmp)));
30657 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30658 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30659 emit_move_insn (res, tmp);
30661 if (HONOR_SIGNED_ZEROS (mode))
30662 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30664 emit_label (label);
30665 LABEL_NUSES (label) = 1;
30667 emit_move_insn (operand0, res);
30670 /* Expand SSE sequence for computing round from OPERAND1 storing
30671 into OPERAND0. Sequence that works without relying on DImode truncation
30672 via cvttsd2siq that is only available on 64bit targets. */
30674 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30676 /* C code for the stuff we expand below.
30677 double xa = fabs (x), xa2, x2;
30678 if (!isless (xa, TWO52))
30680 Using the absolute value and copying back sign makes
30681 -0.0 -> -0.0 correct.
30682 xa2 = xa + TWO52 - TWO52;
30687 else if (dxa > 0.5)
30689 x2 = copysign (xa2, x);
30692 enum machine_mode mode = GET_MODE (operand0);
30693 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30695 TWO52 = ix86_gen_TWO52 (mode);
30697 /* Temporary for holding the result, initialized to the input
30698 operand to ease control flow. */
30699 res = gen_reg_rtx (mode);
30700 emit_move_insn (res, operand1);
30702 /* xa = abs (operand1) */
30703 xa = ix86_expand_sse_fabs (res, &mask);
30705 /* if (!isless (xa, TWO52)) goto label; */
30706 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30708 /* xa2 = xa + TWO52 - TWO52; */
30709 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30710 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30712 /* dxa = xa2 - xa; */
30713 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30715 /* generate 0.5, 1.0 and -0.5 */
30716 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30717 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30718 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30722 tmp = gen_reg_rtx (mode);
30723 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30724 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30725 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30726 gen_rtx_AND (mode, one, tmp)));
30727 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30728 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30729 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30730 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30731 gen_rtx_AND (mode, one, tmp)));
30732 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30734 /* res = copysign (xa2, operand1) */
30735 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30737 emit_label (label);
30738 LABEL_NUSES (label) = 1;
30740 emit_move_insn (operand0, res);
30743 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30746 ix86_expand_trunc (rtx operand0, rtx operand1)
30748 /* C code for SSE variant we expand below.
30749 double xa = fabs (x), x2;
30750 if (!isless (xa, TWO52))
30752 x2 = (double)(long)x;
30753 if (HONOR_SIGNED_ZEROS (mode))
30754 return copysign (x2, x);
30757 enum machine_mode mode = GET_MODE (operand0);
30758 rtx xa, xi, TWO52, label, res, mask;
30760 TWO52 = ix86_gen_TWO52 (mode);
30762 /* Temporary for holding the result, initialized to the input
30763 operand to ease control flow. */
30764 res = gen_reg_rtx (mode);
30765 emit_move_insn (res, operand1);
30767 /* xa = abs (operand1) */
30768 xa = ix86_expand_sse_fabs (res, &mask);
30770 /* if (!isless (xa, TWO52)) goto label; */
30771 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30773 /* x = (double)(long)x */
30774 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30775 expand_fix (xi, res, 0);
30776 expand_float (res, xi, 0);
30778 if (HONOR_SIGNED_ZEROS (mode))
30779 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30781 emit_label (label);
30782 LABEL_NUSES (label) = 1;
30784 emit_move_insn (operand0, res);
30787 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30790 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30792 enum machine_mode mode = GET_MODE (operand0);
30793 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30795 /* C code for SSE variant we expand below.
30796 double xa = fabs (x), x2;
30797 if (!isless (xa, TWO52))
30799 xa2 = xa + TWO52 - TWO52;
30803 x2 = copysign (xa2, x);
30807 TWO52 = ix86_gen_TWO52 (mode);
30809 /* Temporary for holding the result, initialized to the input
30810 operand to ease control flow. */
30811 res = gen_reg_rtx (mode);
30812 emit_move_insn (res, operand1);
30814 /* xa = abs (operand1) */
30815 xa = ix86_expand_sse_fabs (res, &smask);
30817 /* if (!isless (xa, TWO52)) goto label; */
30818 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30820 /* res = xa + TWO52 - TWO52; */
30821 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30822 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30823 emit_move_insn (res, tmp);
30826 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30828 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30829 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30830 emit_insn (gen_rtx_SET (VOIDmode, mask,
30831 gen_rtx_AND (mode, mask, one)));
30832 tmp = expand_simple_binop (mode, MINUS,
30833 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30834 emit_move_insn (res, tmp);
30836 /* res = copysign (res, operand1) */
30837 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30839 emit_label (label);
30840 LABEL_NUSES (label) = 1;
30842 emit_move_insn (operand0, res);
30845 /* Expand SSE sequence for computing round from OPERAND1 storing
30848 ix86_expand_round (rtx operand0, rtx operand1)
30850 /* C code for the stuff we're doing below:
30851 double xa = fabs (x);
30852 if (!isless (xa, TWO52))
30854 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30855 return copysign (xa, x);
30857 enum machine_mode mode = GET_MODE (operand0);
30858 rtx res, TWO52, xa, label, xi, half, mask;
30859 const struct real_format *fmt;
30860 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30862 /* Temporary for holding the result, initialized to the input
30863 operand to ease control flow. */
30864 res = gen_reg_rtx (mode);
30865 emit_move_insn (res, operand1);
30867 TWO52 = ix86_gen_TWO52 (mode);
30868 xa = ix86_expand_sse_fabs (res, &mask);
30869 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30871 /* load nextafter (0.5, 0.0) */
30872 fmt = REAL_MODE_FORMAT (mode);
30873 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30874 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30876 /* xa = xa + 0.5 */
30877 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30878 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30880 /* xa = (double)(int64_t)xa */
30881 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30882 expand_fix (xi, xa, 0);
30883 expand_float (xa, xi, 0);
30885 /* res = copysign (xa, operand1) */
30886 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30888 emit_label (label);
30889 LABEL_NUSES (label) = 1;
30891 emit_move_insn (operand0, res);
30895 /* Table of valid machine attributes. */
30896 static const struct attribute_spec ix86_attribute_table[] =
30898 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30899 /* Stdcall attribute says callee is responsible for popping arguments
30900 if they are not variable. */
30901 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30902 /* Fastcall attribute says callee is responsible for popping arguments
30903 if they are not variable. */
30904 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30905 /* Thiscall attribute says callee is responsible for popping arguments
30906 if they are not variable. */
30907 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30908 /* Cdecl attribute says the callee is a normal C declaration */
30909 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30910 /* Regparm attribute specifies how many integer arguments are to be
30911 passed in registers. */
30912 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30913 /* Sseregparm attribute says we are using x86_64 calling conventions
30914 for FP arguments. */
30915 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30916 /* force_align_arg_pointer says this function realigns the stack at entry. */
30917 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30918 false, true, true, ix86_handle_cconv_attribute },
30919 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30920 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30921 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30922 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30924 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30925 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30926 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30927 SUBTARGET_ATTRIBUTE_TABLE,
30929 /* ms_abi and sysv_abi calling convention function attributes. */
30930 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30931 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30932 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30934 { NULL, 0, 0, false, false, false, NULL }
30937 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30939 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30940 tree vectype ATTRIBUTE_UNUSED,
30941 int misalign ATTRIBUTE_UNUSED)
30943 switch (type_of_cost)
30946 return ix86_cost->scalar_stmt_cost;
30949 return ix86_cost->scalar_load_cost;
30952 return ix86_cost->scalar_store_cost;
30955 return ix86_cost->vec_stmt_cost;
30958 return ix86_cost->vec_align_load_cost;
30961 return ix86_cost->vec_store_cost;
30963 case vec_to_scalar:
30964 return ix86_cost->vec_to_scalar_cost;
30966 case scalar_to_vec:
30967 return ix86_cost->scalar_to_vec_cost;
30969 case unaligned_load:
30970 case unaligned_store:
30971 return ix86_cost->vec_unalign_load_cost;
30973 case cond_branch_taken:
30974 return ix86_cost->cond_taken_branch_cost;
30976 case cond_branch_not_taken:
30977 return ix86_cost->cond_not_taken_branch_cost;
30983 gcc_unreachable ();
30988 /* Implement targetm.vectorize.builtin_vec_perm. */
30991 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30993 tree itype = TREE_TYPE (vec_type);
30994 bool u = TYPE_UNSIGNED (itype);
30995 enum machine_mode vmode = TYPE_MODE (vec_type);
30996 enum ix86_builtins fcode;
30997 bool ok = TARGET_SSE2;
31003 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
31006 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
31008 itype = ix86_get_builtin_type (IX86_BT_DI);
31013 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
31017 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
31019 itype = ix86_get_builtin_type (IX86_BT_SI);
31023 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
31026 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
31029 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
31032 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
31042 *mask_type = itype;
31043 return ix86_builtins[(int) fcode];
31046 /* Return a vector mode with twice as many elements as VMODE. */
31047 /* ??? Consider moving this to a table generated by genmodes.c. */
31049 static enum machine_mode
31050 doublesize_vector_mode (enum machine_mode vmode)
31054 case V2SFmode: return V4SFmode;
31055 case V1DImode: return V2DImode;
31056 case V2SImode: return V4SImode;
31057 case V4HImode: return V8HImode;
31058 case V8QImode: return V16QImode;
31060 case V2DFmode: return V4DFmode;
31061 case V4SFmode: return V8SFmode;
31062 case V2DImode: return V4DImode;
31063 case V4SImode: return V8SImode;
31064 case V8HImode: return V16HImode;
31065 case V16QImode: return V32QImode;
31067 case V4DFmode: return V8DFmode;
31068 case V8SFmode: return V16SFmode;
31069 case V4DImode: return V8DImode;
31070 case V8SImode: return V16SImode;
31071 case V16HImode: return V32HImode;
31072 case V32QImode: return V64QImode;
31075 gcc_unreachable ();
31079 /* Construct (set target (vec_select op0 (parallel perm))) and
31080 return true if that's a valid instruction in the active ISA. */
31083 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
31085 rtx rperm[MAX_VECT_LEN], x;
31088 for (i = 0; i < nelt; ++i)
31089 rperm[i] = GEN_INT (perm[i]);
31091 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
31092 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
31093 x = gen_rtx_SET (VOIDmode, target, x);
31096 if (recog_memoized (x) < 0)
31104 /* Similar, but generate a vec_concat from op0 and op1 as well. */
31107 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
31108 const unsigned char *perm, unsigned nelt)
31110 enum machine_mode v2mode;
31113 v2mode = doublesize_vector_mode (GET_MODE (op0));
31114 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
31115 return expand_vselect (target, x, perm, nelt);
31118 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31119 in terms of blendp[sd] / pblendw / pblendvb. */
31122 expand_vec_perm_blend (struct expand_vec_perm_d *d)
31124 enum machine_mode vmode = d->vmode;
31125 unsigned i, mask, nelt = d->nelt;
31126 rtx target, op0, op1, x;
31128 if (!TARGET_SSE4_1 || d->op0 == d->op1)
31130 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
31133 /* This is a blend, not a permute. Elements must stay in their
31134 respective lanes. */
31135 for (i = 0; i < nelt; ++i)
31137 unsigned e = d->perm[i];
31138 if (!(e == i || e == i + nelt))
31145 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31146 decision should be extracted elsewhere, so that we only try that
31147 sequence once all budget==3 options have been tried. */
31149 /* For bytes, see if bytes move in pairs so we can use pblendw with
31150 an immediate argument, rather than pblendvb with a vector argument. */
31151 if (vmode == V16QImode)
31153 bool pblendw_ok = true;
31154 for (i = 0; i < 16 && pblendw_ok; i += 2)
31155 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31159 rtx rperm[16], vperm;
31161 for (i = 0; i < nelt; ++i)
31162 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31164 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31165 vperm = force_reg (V16QImode, vperm);
31167 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31172 target = d->target;
31184 for (i = 0; i < nelt; ++i)
31185 mask |= (d->perm[i] >= nelt) << i;
31189 for (i = 0; i < 2; ++i)
31190 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31194 for (i = 0; i < 4; ++i)
31195 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31199 for (i = 0; i < 8; ++i)
31200 mask |= (d->perm[i * 2] >= 16) << i;
31204 target = gen_lowpart (vmode, target);
31205 op0 = gen_lowpart (vmode, op0);
31206 op1 = gen_lowpart (vmode, op1);
31210 gcc_unreachable ();
31213 /* This matches five different patterns with the different modes. */
31214 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31215 x = gen_rtx_SET (VOIDmode, target, x);
31221 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31222 in terms of the variable form of vpermilps.
31224 Note that we will have already failed the immediate input vpermilps,
31225 which requires that the high and low part shuffle be identical; the
31226 variable form doesn't require that. */
31229 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31231 rtx rperm[8], vperm;
31234 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31237 /* We can only permute within the 128-bit lane. */
31238 for (i = 0; i < 8; ++i)
31240 unsigned e = d->perm[i];
31241 if (i < 4 ? e >= 4 : e < 4)
31248 for (i = 0; i < 8; ++i)
31250 unsigned e = d->perm[i];
31252 /* Within each 128-bit lane, the elements of op0 are numbered
31253 from 0 and the elements of op1 are numbered from 4. */
31259 rperm[i] = GEN_INT (e);
31262 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31263 vperm = force_reg (V8SImode, vperm);
31264 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31269 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31270 in terms of pshufb or vpperm. */
31273 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31275 unsigned i, nelt, eltsz;
31276 rtx rperm[16], vperm, target, op0, op1;
31278 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31280 if (GET_MODE_SIZE (d->vmode) != 16)
31287 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31289 for (i = 0; i < nelt; ++i)
31291 unsigned j, e = d->perm[i];
31292 for (j = 0; j < eltsz; ++j)
31293 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31296 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31297 vperm = force_reg (V16QImode, vperm);
31299 target = gen_lowpart (V16QImode, d->target);
31300 op0 = gen_lowpart (V16QImode, d->op0);
31301 if (d->op0 == d->op1)
31302 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31305 op1 = gen_lowpart (V16QImode, d->op1);
31306 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31312 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31313 in a single instruction. */
31316 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31318 unsigned i, nelt = d->nelt;
31319 unsigned char perm2[MAX_VECT_LEN];
31321 /* Check plain VEC_SELECT first, because AVX has instructions that could
31322 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31323 input where SEL+CONCAT may not. */
31324 if (d->op0 == d->op1)
31326 int mask = nelt - 1;
31328 for (i = 0; i < nelt; i++)
31329 perm2[i] = d->perm[i] & mask;
31331 if (expand_vselect (d->target, d->op0, perm2, nelt))
31334 /* There are plenty of patterns in sse.md that are written for
31335 SEL+CONCAT and are not replicated for a single op. Perhaps
31336 that should be changed, to avoid the nastiness here. */
31338 /* Recognize interleave style patterns, which means incrementing
31339 every other permutation operand. */
31340 for (i = 0; i < nelt; i += 2)
31342 perm2[i] = d->perm[i] & mask;
31343 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31345 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31348 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31351 for (i = 0; i < nelt; i += 4)
31353 perm2[i + 0] = d->perm[i + 0] & mask;
31354 perm2[i + 1] = d->perm[i + 1] & mask;
31355 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31356 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31359 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31364 /* Finally, try the fully general two operand permute. */
31365 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31368 /* Recognize interleave style patterns with reversed operands. */
31369 if (d->op0 != d->op1)
31371 for (i = 0; i < nelt; ++i)
31373 unsigned e = d->perm[i];
31381 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31385 /* Try the SSE4.1 blend variable merge instructions. */
31386 if (expand_vec_perm_blend (d))
31389 /* Try one of the AVX vpermil variable permutations. */
31390 if (expand_vec_perm_vpermil (d))
31393 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31394 if (expand_vec_perm_pshufb (d))
31400 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31401 in terms of a pair of pshuflw + pshufhw instructions. */
31404 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31406 unsigned char perm2[MAX_VECT_LEN];
31410 if (d->vmode != V8HImode || d->op0 != d->op1)
31413 /* The two permutations only operate in 64-bit lanes. */
31414 for (i = 0; i < 4; ++i)
31415 if (d->perm[i] >= 4)
31417 for (i = 4; i < 8; ++i)
31418 if (d->perm[i] < 4)
31424 /* Emit the pshuflw. */
31425 memcpy (perm2, d->perm, 4);
31426 for (i = 4; i < 8; ++i)
31428 ok = expand_vselect (d->target, d->op0, perm2, 8);
31431 /* Emit the pshufhw. */
31432 memcpy (perm2 + 4, d->perm + 4, 4);
31433 for (i = 0; i < 4; ++i)
31435 ok = expand_vselect (d->target, d->target, perm2, 8);
31441 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31442 the permutation using the SSSE3 palignr instruction. This succeeds
31443 when all of the elements in PERM fit within one vector and we merely
31444 need to shift them down so that a single vector permutation has a
31445 chance to succeed. */
31448 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31450 unsigned i, nelt = d->nelt;
31455 /* Even with AVX, palignr only operates on 128-bit vectors. */
31456 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31459 min = nelt, max = 0;
31460 for (i = 0; i < nelt; ++i)
31462 unsigned e = d->perm[i];
31468 if (min == 0 || max - min >= nelt)
31471 /* Given that we have SSSE3, we know we'll be able to implement the
31472 single operand permutation after the palignr with pshufb. */
31476 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
31477 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
31478 gen_lowpart (TImode, d->op1),
31479 gen_lowpart (TImode, d->op0), shift));
31481 d->op0 = d->op1 = d->target;
31484 for (i = 0; i < nelt; ++i)
31486 unsigned e = d->perm[i] - min;
31492 /* Test for the degenerate case where the alignment by itself
31493 produces the desired permutation. */
31497 ok = expand_vec_perm_1 (d);
31503 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31504 a two vector permutation into a single vector permutation by using
31505 an interleave operation to merge the vectors. */
31508 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31510 struct expand_vec_perm_d dremap, dfinal;
31511 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31512 unsigned contents, h1, h2, h3, h4;
31513 unsigned char remap[2 * MAX_VECT_LEN];
31517 if (d->op0 == d->op1)
31520 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31521 lanes. We can use similar techniques with the vperm2f128 instruction,
31522 but it requires slightly different logic. */
31523 if (GET_MODE_SIZE (d->vmode) != 16)
31526 /* Examine from whence the elements come. */
31528 for (i = 0; i < nelt; ++i)
31529 contents |= 1u << d->perm[i];
31531 /* Split the two input vectors into 4 halves. */
31532 h1 = (1u << nelt2) - 1;
31537 memset (remap, 0xff, sizeof (remap));
31540 /* If the elements from the low halves use interleave low, and similarly
31541 for interleave high. If the elements are from mis-matched halves, we
31542 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31543 if ((contents & (h1 | h3)) == contents)
31545 for (i = 0; i < nelt2; ++i)
31548 remap[i + nelt] = i * 2 + 1;
31549 dremap.perm[i * 2] = i;
31550 dremap.perm[i * 2 + 1] = i + nelt;
31553 else if ((contents & (h2 | h4)) == contents)
31555 for (i = 0; i < nelt2; ++i)
31557 remap[i + nelt2] = i * 2;
31558 remap[i + nelt + nelt2] = i * 2 + 1;
31559 dremap.perm[i * 2] = i + nelt2;
31560 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31563 else if ((contents & (h1 | h4)) == contents)
31565 for (i = 0; i < nelt2; ++i)
31568 remap[i + nelt + nelt2] = i + nelt2;
31569 dremap.perm[i] = i;
31570 dremap.perm[i + nelt2] = i + nelt + nelt2;
31574 dremap.vmode = V2DImode;
31576 dremap.perm[0] = 0;
31577 dremap.perm[1] = 3;
31580 else if ((contents & (h2 | h3)) == contents)
31582 for (i = 0; i < nelt2; ++i)
31584 remap[i + nelt2] = i;
31585 remap[i + nelt] = i + nelt2;
31586 dremap.perm[i] = i + nelt2;
31587 dremap.perm[i + nelt2] = i + nelt;
31591 dremap.vmode = V2DImode;
31593 dremap.perm[0] = 1;
31594 dremap.perm[1] = 2;
31600 /* Use the remapping array set up above to move the elements from their
31601 swizzled locations into their final destinations. */
31603 for (i = 0; i < nelt; ++i)
31605 unsigned e = remap[d->perm[i]];
31606 gcc_assert (e < nelt);
31607 dfinal.perm[i] = e;
31609 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31610 dfinal.op1 = dfinal.op0;
31611 dremap.target = dfinal.op0;
31613 /* Test if the final remap can be done with a single insn. For V4SFmode or
31614 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31616 ok = expand_vec_perm_1 (&dfinal);
31617 seq = get_insns ();
31623 if (dremap.vmode != dfinal.vmode)
31625 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31626 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31627 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31630 ok = expand_vec_perm_1 (&dremap);
31637 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31638 permutation with two pshufb insns and an ior. We should have already
31639 failed all two instruction sequences. */
31642 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31644 rtx rperm[2][16], vperm, l, h, op, m128;
31645 unsigned int i, nelt, eltsz;
31647 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31649 gcc_assert (d->op0 != d->op1);
31652 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31654 /* Generate two permutation masks. If the required element is within
31655 the given vector it is shuffled into the proper lane. If the required
31656 element is in the other vector, force a zero into the lane by setting
31657 bit 7 in the permutation mask. */
31658 m128 = GEN_INT (-128);
31659 for (i = 0; i < nelt; ++i)
31661 unsigned j, e = d->perm[i];
31662 unsigned which = (e >= nelt);
31666 for (j = 0; j < eltsz; ++j)
31668 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31669 rperm[1-which][i*eltsz + j] = m128;
31673 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31674 vperm = force_reg (V16QImode, vperm);
31676 l = gen_reg_rtx (V16QImode);
31677 op = gen_lowpart (V16QImode, d->op0);
31678 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31680 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31681 vperm = force_reg (V16QImode, vperm);
31683 h = gen_reg_rtx (V16QImode);
31684 op = gen_lowpart (V16QImode, d->op1);
31685 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31687 op = gen_lowpart (V16QImode, d->target);
31688 emit_insn (gen_iorv16qi3 (op, l, h));
31693 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31694 and extract-odd permutations. */
31697 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31704 t1 = gen_reg_rtx (V4DFmode);
31705 t2 = gen_reg_rtx (V4DFmode);
31707 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31708 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31709 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31711 /* Now an unpck[lh]pd will produce the result required. */
31713 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31715 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31721 int mask = odd ? 0xdd : 0x88;
31723 t1 = gen_reg_rtx (V8SFmode);
31724 t2 = gen_reg_rtx (V8SFmode);
31725 t3 = gen_reg_rtx (V8SFmode);
31727 /* Shuffle within the 128-bit lanes to produce:
31728 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
31729 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
31732 /* Shuffle the lanes around to produce:
31733 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
31734 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
31737 /* Shuffle within the 128-bit lanes to produce:
31738 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
31739 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
31741 /* Shuffle within the 128-bit lanes to produce:
31742 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
31743 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
31745 /* Shuffle the lanes around to produce:
31746 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
31747 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
31756 /* These are always directly implementable by expand_vec_perm_1. */
31757 gcc_unreachable ();
31761 return expand_vec_perm_pshufb2 (d);
31764 /* We need 2*log2(N)-1 operations to achieve odd/even
31765 with interleave. */
31766 t1 = gen_reg_rtx (V8HImode);
31767 t2 = gen_reg_rtx (V8HImode);
31768 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31769 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31770 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31771 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31773 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31775 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31782 return expand_vec_perm_pshufb2 (d);
31785 t1 = gen_reg_rtx (V16QImode);
31786 t2 = gen_reg_rtx (V16QImode);
31787 t3 = gen_reg_rtx (V16QImode);
31788 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31789 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31790 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31791 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31792 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31793 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31795 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31797 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31803 gcc_unreachable ();
31809 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31810 extract-even and extract-odd permutations. */
31813 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31815 unsigned i, odd, nelt = d->nelt;
31818 if (odd != 0 && odd != 1)
31821 for (i = 1; i < nelt; ++i)
31822 if (d->perm[i] != 2 * i + odd)
31825 return expand_vec_perm_even_odd_1 (d, odd);
31828 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31829 permutations. We assume that expand_vec_perm_1 has already failed. */
31832 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31834 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31835 enum machine_mode vmode = d->vmode;
31836 unsigned char perm2[4];
31844 /* These are special-cased in sse.md so that we can optionally
31845 use the vbroadcast instruction. They expand to two insns
31846 if the input happens to be in a register. */
31847 gcc_unreachable ();
31853 /* These are always implementable using standard shuffle patterns. */
31854 gcc_unreachable ();
31858 /* These can be implemented via interleave. We save one insn by
31859 stopping once we have promoted to V4SImode and then use pshufd. */
31862 optab otab = vec_interleave_low_optab;
31866 otab = vec_interleave_high_optab;
31871 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31872 vmode = get_mode_wider_vector (vmode);
31873 op0 = gen_lowpart (vmode, op0);
31875 while (vmode != V4SImode);
31877 memset (perm2, elt, 4);
31878 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31883 gcc_unreachable ();
31887 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31888 broadcast permutations. */
31891 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31893 unsigned i, elt, nelt = d->nelt;
31895 if (d->op0 != d->op1)
31899 for (i = 1; i < nelt; ++i)
31900 if (d->perm[i] != elt)
31903 return expand_vec_perm_broadcast_1 (d);
31906 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31907 With all of the interface bits taken care of, perform the expansion
31908 in D and return true on success. */
31911 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31913 /* Try a single instruction expansion. */
31914 if (expand_vec_perm_1 (d))
31917 /* Try sequences of two instructions. */
31919 if (expand_vec_perm_pshuflw_pshufhw (d))
31922 if (expand_vec_perm_palignr (d))
31925 if (expand_vec_perm_interleave2 (d))
31928 if (expand_vec_perm_broadcast (d))
31931 /* Try sequences of three instructions. */
31933 if (expand_vec_perm_pshufb2 (d))
31936 /* ??? Look for narrow permutations whose element orderings would
31937 allow the promotion to a wider mode. */
31939 /* ??? Look for sequences of interleave or a wider permute that place
31940 the data into the correct lanes for a half-vector shuffle like
31941 pshuf[lh]w or vpermilps. */
31943 /* ??? Look for sequences of interleave that produce the desired results.
31944 The combinatorics of punpck[lh] get pretty ugly... */
31946 if (expand_vec_perm_even_odd (d))
31952 /* Extract the values from the vector CST into the permutation array in D.
31953 Return 0 on error, 1 if all values from the permutation come from the
31954 first vector, 2 if all values from the second vector, and 3 otherwise. */
31957 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31959 tree list = TREE_VECTOR_CST_ELTS (cst);
31960 unsigned i, nelt = d->nelt;
31963 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31965 unsigned HOST_WIDE_INT e;
31967 if (!host_integerp (TREE_VALUE (list), 1))
31969 e = tree_low_cst (TREE_VALUE (list), 1);
31973 ret |= (e < nelt ? 1 : 2);
31976 gcc_assert (list == NULL);
31978 /* For all elements from second vector, fold the elements to first. */
31980 for (i = 0; i < nelt; ++i)
31981 d->perm[i] -= nelt;
31987 ix86_expand_vec_perm_builtin (tree exp)
31989 struct expand_vec_perm_d d;
31990 tree arg0, arg1, arg2;
31992 arg0 = CALL_EXPR_ARG (exp, 0);
31993 arg1 = CALL_EXPR_ARG (exp, 1);
31994 arg2 = CALL_EXPR_ARG (exp, 2);
31996 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31997 d.nelt = GET_MODE_NUNITS (d.vmode);
31998 d.testing_p = false;
31999 gcc_assert (VECTOR_MODE_P (d.vmode));
32001 if (TREE_CODE (arg2) != VECTOR_CST)
32003 error_at (EXPR_LOCATION (exp),
32004 "vector permutation requires vector constant");
32008 switch (extract_vec_perm_cst (&d, arg2))
32014 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
32018 if (!operand_equal_p (arg0, arg1, 0))
32020 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
32021 d.op0 = force_reg (d.vmode, d.op0);
32022 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
32023 d.op1 = force_reg (d.vmode, d.op1);
32027 /* The elements of PERM do not suggest that only the first operand
32028 is used, but both operands are identical. Allow easier matching
32029 of the permutation by folding the permutation into the single
32032 unsigned i, nelt = d.nelt;
32033 for (i = 0; i < nelt; ++i)
32034 if (d.perm[i] >= nelt)
32040 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
32041 d.op0 = force_reg (d.vmode, d.op0);
32046 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
32047 d.op0 = force_reg (d.vmode, d.op0);
32052 d.target = gen_reg_rtx (d.vmode);
32053 if (ix86_expand_vec_perm_builtin_1 (&d))
32056 /* For compiler generated permutations, we should never got here, because
32057 the compiler should also be checking the ok hook. But since this is a
32058 builtin the user has access too, so don't abort. */
32062 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
32065 sorry ("vector permutation (%d %d %d %d)",
32066 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
32069 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
32070 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
32071 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
32074 sorry ("vector permutation "
32075 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
32076 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
32077 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
32078 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
32079 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
32082 gcc_unreachable ();
32085 return CONST0_RTX (d.vmode);
32088 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
32091 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
32093 struct expand_vec_perm_d d;
32097 d.vmode = TYPE_MODE (vec_type);
32098 d.nelt = GET_MODE_NUNITS (d.vmode);
32099 d.testing_p = true;
32101 /* Given sufficient ISA support we can just return true here
32102 for selected vector modes. */
32103 if (GET_MODE_SIZE (d.vmode) == 16)
32105 /* All implementable with a single vpperm insn. */
32108 /* All implementable with 2 pshufb + 1 ior. */
32111 /* All implementable with shufpd or unpck[lh]pd. */
32116 vec_mask = extract_vec_perm_cst (&d, mask);
32118 /* This hook is cannot be called in response to something that the
32119 user does (unlike the builtin expander) so we shouldn't ever see
32120 an error generated from the extract. */
32121 gcc_assert (vec_mask > 0 && vec_mask <= 3);
32122 one_vec = (vec_mask != 3);
32124 /* Implementable with shufps or pshufd. */
32125 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
32128 /* Otherwise we have to go through the motions and see if we can
32129 figure out how to generate the requested permutation. */
32130 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32131 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32133 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32136 ret = ix86_expand_vec_perm_builtin_1 (&d);
32143 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32145 struct expand_vec_perm_d d;
32151 d.vmode = GET_MODE (targ);
32152 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32153 d.testing_p = false;
32155 for (i = 0; i < nelt; ++i)
32156 d.perm[i] = i * 2 + odd;
32158 /* We'll either be able to implement the permutation directly... */
32159 if (expand_vec_perm_1 (&d))
32162 /* ... or we use the special-case patterns. */
32163 expand_vec_perm_even_odd_1 (&d, odd);
32166 /* This function returns the calling abi specific va_list type node.
32167 It returns the FNDECL specific va_list type. */
32170 ix86_fn_abi_va_list (tree fndecl)
32173 return va_list_type_node;
32174 gcc_assert (fndecl != NULL_TREE);
32176 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32177 return ms_va_list_type_node;
32179 return sysv_va_list_type_node;
32182 /* Returns the canonical va_list type specified by TYPE. If there
32183 is no valid TYPE provided, it return NULL_TREE. */
32186 ix86_canonical_va_list_type (tree type)
32190 /* Resolve references and pointers to va_list type. */
32191 if (TREE_CODE (type) == MEM_REF)
32192 type = TREE_TYPE (type);
32193 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32194 type = TREE_TYPE (type);
32195 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32196 type = TREE_TYPE (type);
32200 wtype = va_list_type_node;
32201 gcc_assert (wtype != NULL_TREE);
32203 if (TREE_CODE (wtype) == ARRAY_TYPE)
32205 /* If va_list is an array type, the argument may have decayed
32206 to a pointer type, e.g. by being passed to another function.
32207 In that case, unwrap both types so that we can compare the
32208 underlying records. */
32209 if (TREE_CODE (htype) == ARRAY_TYPE
32210 || POINTER_TYPE_P (htype))
32212 wtype = TREE_TYPE (wtype);
32213 htype = TREE_TYPE (htype);
32216 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32217 return va_list_type_node;
32218 wtype = sysv_va_list_type_node;
32219 gcc_assert (wtype != NULL_TREE);
32221 if (TREE_CODE (wtype) == ARRAY_TYPE)
32223 /* If va_list is an array type, the argument may have decayed
32224 to a pointer type, e.g. by being passed to another function.
32225 In that case, unwrap both types so that we can compare the
32226 underlying records. */
32227 if (TREE_CODE (htype) == ARRAY_TYPE
32228 || POINTER_TYPE_P (htype))
32230 wtype = TREE_TYPE (wtype);
32231 htype = TREE_TYPE (htype);
32234 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32235 return sysv_va_list_type_node;
32236 wtype = ms_va_list_type_node;
32237 gcc_assert (wtype != NULL_TREE);
32239 if (TREE_CODE (wtype) == ARRAY_TYPE)
32241 /* If va_list is an array type, the argument may have decayed
32242 to a pointer type, e.g. by being passed to another function.
32243 In that case, unwrap both types so that we can compare the
32244 underlying records. */
32245 if (TREE_CODE (htype) == ARRAY_TYPE
32246 || POINTER_TYPE_P (htype))
32248 wtype = TREE_TYPE (wtype);
32249 htype = TREE_TYPE (htype);
32252 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32253 return ms_va_list_type_node;
32256 return std_canonical_va_list_type (type);
32259 /* Iterate through the target-specific builtin types for va_list.
32260 IDX denotes the iterator, *PTREE is set to the result type of
32261 the va_list builtin, and *PNAME to its internal type.
32262 Returns zero if there is no element for this index, otherwise
32263 IDX should be increased upon the next call.
32264 Note, do not iterate a base builtin's name like __builtin_va_list.
32265 Used from c_common_nodes_and_builtins. */
32268 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32278 *ptree = ms_va_list_type_node;
32279 *pname = "__builtin_ms_va_list";
32283 *ptree = sysv_va_list_type_node;
32284 *pname = "__builtin_sysv_va_list";
32292 #undef TARGET_SCHED_DISPATCH
32293 #define TARGET_SCHED_DISPATCH has_dispatch
32294 #undef TARGET_SCHED_DISPATCH_DO
32295 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32297 /* The size of the dispatch window is the total number of bytes of
32298 object code allowed in a window. */
32299 #define DISPATCH_WINDOW_SIZE 16
32301 /* Number of dispatch windows considered for scheduling. */
32302 #define MAX_DISPATCH_WINDOWS 3
32304 /* Maximum number of instructions in a window. */
32307 /* Maximum number of immediate operands in a window. */
32310 /* Maximum number of immediate bits allowed in a window. */
32311 #define MAX_IMM_SIZE 128
32313 /* Maximum number of 32 bit immediates allowed in a window. */
32314 #define MAX_IMM_32 4
32316 /* Maximum number of 64 bit immediates allowed in a window. */
32317 #define MAX_IMM_64 2
32319 /* Maximum total of loads or prefetches allowed in a window. */
32322 /* Maximum total of stores allowed in a window. */
32323 #define MAX_STORE 1
32329 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32330 enum dispatch_group {
32345 /* Number of allowable groups in a dispatch window. It is an array
32346 indexed by dispatch_group enum. 100 is used as a big number,
32347 because the number of these kind of operations does not have any
32348 effect in dispatch window, but we need them for other reasons in
32350 static unsigned int num_allowable_groups[disp_last] = {
32351 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32354 char group_name[disp_last + 1][16] = {
32355 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32356 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32357 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32360 /* Instruction path. */
32363 path_single, /* Single micro op. */
32364 path_double, /* Double micro op. */
32365 path_multi, /* Instructions with more than 2 micro op.. */
32369 /* sched_insn_info defines a window to the instructions scheduled in
32370 the basic block. It contains a pointer to the insn_info table and
32371 the instruction scheduled.
32373 Windows are allocated for each basic block and are linked
32375 typedef struct sched_insn_info_s {
32377 enum dispatch_group group;
32378 enum insn_path path;
32383 /* Linked list of dispatch windows. This is a two way list of
32384 dispatch windows of a basic block. It contains information about
32385 the number of uops in the window and the total number of
32386 instructions and of bytes in the object code for this dispatch
32388 typedef struct dispatch_windows_s {
32389 int num_insn; /* Number of insn in the window. */
32390 int num_uops; /* Number of uops in the window. */
32391 int window_size; /* Number of bytes in the window. */
32392 int window_num; /* Window number between 0 or 1. */
32393 int num_imm; /* Number of immediates in an insn. */
32394 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32395 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32396 int imm_size; /* Total immediates in the window. */
32397 int num_loads; /* Total memory loads in the window. */
32398 int num_stores; /* Total memory stores in the window. */
32399 int violation; /* Violation exists in window. */
32400 sched_insn_info *window; /* Pointer to the window. */
32401 struct dispatch_windows_s *next;
32402 struct dispatch_windows_s *prev;
32403 } dispatch_windows;
32405 /* Immediate valuse used in an insn. */
32406 typedef struct imm_info_s
32413 static dispatch_windows *dispatch_window_list;
32414 static dispatch_windows *dispatch_window_list1;
32416 /* Get dispatch group of insn. */
32418 static enum dispatch_group
32419 get_mem_group (rtx insn)
32421 enum attr_memory memory;
32423 if (INSN_CODE (insn) < 0)
32424 return disp_no_group;
32425 memory = get_attr_memory (insn);
32426 if (memory == MEMORY_STORE)
32429 if (memory == MEMORY_LOAD)
32432 if (memory == MEMORY_BOTH)
32433 return disp_load_store;
32435 return disp_no_group;
32438 /* Return true if insn is a compare instruction. */
32443 enum attr_type type;
32445 type = get_attr_type (insn);
32446 return (type == TYPE_TEST
32447 || type == TYPE_ICMP
32448 || type == TYPE_FCMP
32449 || GET_CODE (PATTERN (insn)) == COMPARE);
32452 /* Return true if a dispatch violation encountered. */
32455 dispatch_violation (void)
32457 if (dispatch_window_list->next)
32458 return dispatch_window_list->next->violation;
32459 return dispatch_window_list->violation;
32462 /* Return true if insn is a branch instruction. */
32465 is_branch (rtx insn)
32467 return (CALL_P (insn) || JUMP_P (insn));
32470 /* Return true if insn is a prefetch instruction. */
32473 is_prefetch (rtx insn)
32475 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
32478 /* This function initializes a dispatch window and the list container holding a
32479 pointer to the window. */
32482 init_window (int window_num)
32485 dispatch_windows *new_list;
32487 if (window_num == 0)
32488 new_list = dispatch_window_list;
32490 new_list = dispatch_window_list1;
32492 new_list->num_insn = 0;
32493 new_list->num_uops = 0;
32494 new_list->window_size = 0;
32495 new_list->next = NULL;
32496 new_list->prev = NULL;
32497 new_list->window_num = window_num;
32498 new_list->num_imm = 0;
32499 new_list->num_imm_32 = 0;
32500 new_list->num_imm_64 = 0;
32501 new_list->imm_size = 0;
32502 new_list->num_loads = 0;
32503 new_list->num_stores = 0;
32504 new_list->violation = false;
32506 for (i = 0; i < MAX_INSN; i++)
32508 new_list->window[i].insn = NULL;
32509 new_list->window[i].group = disp_no_group;
32510 new_list->window[i].path = no_path;
32511 new_list->window[i].byte_len = 0;
32512 new_list->window[i].imm_bytes = 0;
32517 /* This function allocates and initializes a dispatch window and the
32518 list container holding a pointer to the window. */
32520 static dispatch_windows *
32521 allocate_window (void)
32523 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32524 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32529 /* This routine initializes the dispatch scheduling information. It
32530 initiates building dispatch scheduler tables and constructs the
32531 first dispatch window. */
32534 init_dispatch_sched (void)
32536 /* Allocate a dispatch list and a window. */
32537 dispatch_window_list = allocate_window ();
32538 dispatch_window_list1 = allocate_window ();
32543 /* This function returns true if a branch is detected. End of a basic block
32544 does not have to be a branch, but here we assume only branches end a
32548 is_end_basic_block (enum dispatch_group group)
32550 return group == disp_branch;
32553 /* This function is called when the end of a window processing is reached. */
32556 process_end_window (void)
32558 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32559 if (dispatch_window_list->next)
32561 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32562 gcc_assert (dispatch_window_list->window_size
32563 + dispatch_window_list1->window_size <= 48);
32569 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32570 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32571 for 48 bytes of instructions. Note that these windows are not dispatch
32572 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32574 static dispatch_windows *
32575 allocate_next_window (int window_num)
32577 if (window_num == 0)
32579 if (dispatch_window_list->next)
32582 return dispatch_window_list;
32585 dispatch_window_list->next = dispatch_window_list1;
32586 dispatch_window_list1->prev = dispatch_window_list;
32588 return dispatch_window_list1;
32591 /* Increment the number of immediate operands of an instruction. */
32594 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32599 switch ( GET_CODE (*in_rtx))
32604 (imm_values->imm)++;
32605 if (x86_64_immediate_operand (*in_rtx, SImode))
32606 (imm_values->imm32)++;
32608 (imm_values->imm64)++;
32612 (imm_values->imm)++;
32613 (imm_values->imm64)++;
32617 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32619 (imm_values->imm)++;
32620 (imm_values->imm32)++;
32631 /* Compute number of immediate operands of an instruction. */
32634 find_constant (rtx in_rtx, imm_info *imm_values)
32636 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32637 (rtx_function) find_constant_1, (void *) imm_values);
32640 /* Return total size of immediate operands of an instruction along with number
32641 of corresponding immediate-operands. It initializes its parameters to zero
32642 befor calling FIND_CONSTANT.
32643 INSN is the input instruction. IMM is the total of immediates.
32644 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32648 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32650 imm_info imm_values = {0, 0, 0};
32652 find_constant (insn, &imm_values);
32653 *imm = imm_values.imm;
32654 *imm32 = imm_values.imm32;
32655 *imm64 = imm_values.imm64;
32656 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32659 /* This function indicates if an operand of an instruction is an
32663 has_immediate (rtx insn)
32665 int num_imm_operand;
32666 int num_imm32_operand;
32667 int num_imm64_operand;
32670 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32671 &num_imm64_operand);
32675 /* Return single or double path for instructions. */
32677 static enum insn_path
32678 get_insn_path (rtx insn)
32680 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32682 if ((int)path == 0)
32683 return path_single;
32685 if ((int)path == 1)
32686 return path_double;
32691 /* Return insn dispatch group. */
32693 static enum dispatch_group
32694 get_insn_group (rtx insn)
32696 enum dispatch_group group = get_mem_group (insn);
32700 if (is_branch (insn))
32701 return disp_branch;
32706 if (has_immediate (insn))
32709 if (is_prefetch (insn))
32710 return disp_prefetch;
32712 return disp_no_group;
32715 /* Count number of GROUP restricted instructions in a dispatch
32716 window WINDOW_LIST. */
32719 count_num_restricted (rtx insn, dispatch_windows *window_list)
32721 enum dispatch_group group = get_insn_group (insn);
32723 int num_imm_operand;
32724 int num_imm32_operand;
32725 int num_imm64_operand;
32727 if (group == disp_no_group)
32730 if (group == disp_imm)
32732 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32733 &num_imm64_operand);
32734 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32735 || num_imm_operand + window_list->num_imm > MAX_IMM
32736 || (num_imm32_operand > 0
32737 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32738 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32739 || (num_imm64_operand > 0
32740 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32741 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32742 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32743 && num_imm64_operand > 0
32744 && ((window_list->num_imm_64 > 0
32745 && window_list->num_insn >= 2)
32746 || window_list->num_insn >= 3)))
32752 if ((group == disp_load_store
32753 && (window_list->num_loads >= MAX_LOAD
32754 || window_list->num_stores >= MAX_STORE))
32755 || ((group == disp_load
32756 || group == disp_prefetch)
32757 && window_list->num_loads >= MAX_LOAD)
32758 || (group == disp_store
32759 && window_list->num_stores >= MAX_STORE))
32765 /* This function returns true if insn satisfies dispatch rules on the
32766 last window scheduled. */
32769 fits_dispatch_window (rtx insn)
32771 dispatch_windows *window_list = dispatch_window_list;
32772 dispatch_windows *window_list_next = dispatch_window_list->next;
32773 unsigned int num_restrict;
32774 enum dispatch_group group = get_insn_group (insn);
32775 enum insn_path path = get_insn_path (insn);
32778 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32779 instructions should be given the lowest priority in the
32780 scheduling process in Haifa scheduler to make sure they will be
32781 scheduled in the same dispatch window as the refrence to them. */
32782 if (group == disp_jcc || group == disp_cmp)
32785 /* Check nonrestricted. */
32786 if (group == disp_no_group || group == disp_branch)
32789 /* Get last dispatch window. */
32790 if (window_list_next)
32791 window_list = window_list_next;
32793 if (window_list->window_num == 1)
32795 sum = window_list->prev->window_size + window_list->window_size;
32798 || (min_insn_size (insn) + sum) >= 48)
32799 /* Window 1 is full. Go for next window. */
32803 num_restrict = count_num_restricted (insn, window_list);
32805 if (num_restrict > num_allowable_groups[group])
32808 /* See if it fits in the first window. */
32809 if (window_list->window_num == 0)
32811 /* The first widow should have only single and double path
32813 if (path == path_double
32814 && (window_list->num_uops + 2) > MAX_INSN)
32816 else if (path != path_single)
32822 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32823 dispatch window WINDOW_LIST. */
32826 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32828 int byte_len = min_insn_size (insn);
32829 int num_insn = window_list->num_insn;
32831 sched_insn_info *window = window_list->window;
32832 enum dispatch_group group = get_insn_group (insn);
32833 enum insn_path path = get_insn_path (insn);
32834 int num_imm_operand;
32835 int num_imm32_operand;
32836 int num_imm64_operand;
32838 if (!window_list->violation && group != disp_cmp
32839 && !fits_dispatch_window (insn))
32840 window_list->violation = true;
32842 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32843 &num_imm64_operand);
32845 /* Initialize window with new instruction. */
32846 window[num_insn].insn = insn;
32847 window[num_insn].byte_len = byte_len;
32848 window[num_insn].group = group;
32849 window[num_insn].path = path;
32850 window[num_insn].imm_bytes = imm_size;
32852 window_list->window_size += byte_len;
32853 window_list->num_insn = num_insn + 1;
32854 window_list->num_uops = window_list->num_uops + num_uops;
32855 window_list->imm_size += imm_size;
32856 window_list->num_imm += num_imm_operand;
32857 window_list->num_imm_32 += num_imm32_operand;
32858 window_list->num_imm_64 += num_imm64_operand;
32860 if (group == disp_store)
32861 window_list->num_stores += 1;
32862 else if (group == disp_load
32863 || group == disp_prefetch)
32864 window_list->num_loads += 1;
32865 else if (group == disp_load_store)
32867 window_list->num_stores += 1;
32868 window_list->num_loads += 1;
32872 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32873 If the total bytes of instructions or the number of instructions in
32874 the window exceed allowable, it allocates a new window. */
32877 add_to_dispatch_window (rtx insn)
32880 dispatch_windows *window_list;
32881 dispatch_windows *next_list;
32882 dispatch_windows *window0_list;
32883 enum insn_path path;
32884 enum dispatch_group insn_group;
32892 if (INSN_CODE (insn) < 0)
32895 byte_len = min_insn_size (insn);
32896 window_list = dispatch_window_list;
32897 next_list = window_list->next;
32898 path = get_insn_path (insn);
32899 insn_group = get_insn_group (insn);
32901 /* Get the last dispatch window. */
32903 window_list = dispatch_window_list->next;
32905 if (path == path_single)
32907 else if (path == path_double)
32910 insn_num_uops = (int) path;
32912 /* If current window is full, get a new window.
32913 Window number zero is full, if MAX_INSN uops are scheduled in it.
32914 Window number one is full, if window zero's bytes plus window
32915 one's bytes is 32, or if the bytes of the new instruction added
32916 to the total makes it greater than 48, or it has already MAX_INSN
32917 instructions in it. */
32918 num_insn = window_list->num_insn;
32919 num_uops = window_list->num_uops;
32920 window_num = window_list->window_num;
32921 insn_fits = fits_dispatch_window (insn);
32923 if (num_insn >= MAX_INSN
32924 || num_uops + insn_num_uops > MAX_INSN
32927 window_num = ~window_num & 1;
32928 window_list = allocate_next_window (window_num);
32931 if (window_num == 0)
32933 add_insn_window (insn, window_list, insn_num_uops);
32934 if (window_list->num_insn >= MAX_INSN
32935 && insn_group == disp_branch)
32937 process_end_window ();
32941 else if (window_num == 1)
32943 window0_list = window_list->prev;
32944 sum = window0_list->window_size + window_list->window_size;
32946 || (byte_len + sum) >= 48)
32948 process_end_window ();
32949 window_list = dispatch_window_list;
32952 add_insn_window (insn, window_list, insn_num_uops);
32955 gcc_unreachable ();
32957 if (is_end_basic_block (insn_group))
32959 /* End of basic block is reached do end-basic-block process. */
32960 process_end_window ();
32965 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32967 DEBUG_FUNCTION static void
32968 debug_dispatch_window_file (FILE *file, int window_num)
32970 dispatch_windows *list;
32973 if (window_num == 0)
32974 list = dispatch_window_list;
32976 list = dispatch_window_list1;
32978 fprintf (file, "Window #%d:\n", list->window_num);
32979 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32980 list->num_insn, list->num_uops, list->window_size);
32981 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32982 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32984 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32986 fprintf (file, " insn info:\n");
32988 for (i = 0; i < MAX_INSN; i++)
32990 if (!list->window[i].insn)
32992 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32993 i, group_name[list->window[i].group],
32994 i, (void *)list->window[i].insn,
32995 i, list->window[i].path,
32996 i, list->window[i].byte_len,
32997 i, list->window[i].imm_bytes);
33001 /* Print to stdout a dispatch window. */
33003 DEBUG_FUNCTION void
33004 debug_dispatch_window (int window_num)
33006 debug_dispatch_window_file (stdout, window_num);
33009 /* Print INSN dispatch information to FILE. */
33011 DEBUG_FUNCTION static void
33012 debug_insn_dispatch_info_file (FILE *file, rtx insn)
33015 enum insn_path path;
33016 enum dispatch_group group;
33018 int num_imm_operand;
33019 int num_imm32_operand;
33020 int num_imm64_operand;
33022 if (INSN_CODE (insn) < 0)
33025 byte_len = min_insn_size (insn);
33026 path = get_insn_path (insn);
33027 group = get_insn_group (insn);
33028 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33029 &num_imm64_operand);
33031 fprintf (file, " insn info:\n");
33032 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
33033 group_name[group], path, byte_len);
33034 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
33035 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
33038 /* Print to STDERR the status of the ready list with respect to
33039 dispatch windows. */
33041 DEBUG_FUNCTION void
33042 debug_ready_dispatch (void)
33045 int no_ready = number_in_ready ();
33047 fprintf (stdout, "Number of ready: %d\n", no_ready);
33049 for (i = 0; i < no_ready; i++)
33050 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
33053 /* This routine is the driver of the dispatch scheduler. */
33056 do_dispatch (rtx insn, int mode)
33058 if (mode == DISPATCH_INIT)
33059 init_dispatch_sched ();
33060 else if (mode == ADD_TO_DISPATCH_WINDOW)
33061 add_to_dispatch_window (insn);
33064 /* Return TRUE if Dispatch Scheduling is supported. */
33067 has_dispatch (rtx insn, int action)
33069 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
33075 case IS_DISPATCH_ON:
33080 return is_cmp (insn);
33082 case DISPATCH_VIOLATION:
33083 return dispatch_violation ();
33085 case FITS_DISPATCH_WINDOW:
33086 return fits_dispatch_window (insn);
33092 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
33093 place emms and femms instructions. */
33095 static enum machine_mode
33096 ix86_preferred_simd_mode (enum machine_mode mode)
33098 /* Disable double precision vectorizer if needed. */
33099 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
33102 if (!TARGET_AVX && !TARGET_SSE)
33108 return TARGET_AVX ? V8SFmode : V4SFmode;
33110 return TARGET_AVX ? V4DFmode : V2DFmode;
33126 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
33129 static unsigned int
33130 ix86_autovectorize_vector_sizes (void)
33132 return TARGET_AVX ? 32 | 16 : 0;
33135 /* Initialize the GCC target structure. */
33136 #undef TARGET_RETURN_IN_MEMORY
33137 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
33139 #undef TARGET_LEGITIMIZE_ADDRESS
33140 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
33142 #undef TARGET_ATTRIBUTE_TABLE
33143 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
33144 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33145 # undef TARGET_MERGE_DECL_ATTRIBUTES
33146 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
33149 #undef TARGET_COMP_TYPE_ATTRIBUTES
33150 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
33152 #undef TARGET_INIT_BUILTINS
33153 #define TARGET_INIT_BUILTINS ix86_init_builtins
33154 #undef TARGET_BUILTIN_DECL
33155 #define TARGET_BUILTIN_DECL ix86_builtin_decl
33156 #undef TARGET_EXPAND_BUILTIN
33157 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
33159 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
33160 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33161 ix86_builtin_vectorized_function
33163 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33164 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33166 #undef TARGET_BUILTIN_RECIPROCAL
33167 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33169 #undef TARGET_ASM_FUNCTION_EPILOGUE
33170 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33172 #undef TARGET_ENCODE_SECTION_INFO
33173 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33174 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33176 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33179 #undef TARGET_ASM_OPEN_PAREN
33180 #define TARGET_ASM_OPEN_PAREN ""
33181 #undef TARGET_ASM_CLOSE_PAREN
33182 #define TARGET_ASM_CLOSE_PAREN ""
33184 #undef TARGET_ASM_BYTE_OP
33185 #define TARGET_ASM_BYTE_OP ASM_BYTE
33187 #undef TARGET_ASM_ALIGNED_HI_OP
33188 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33189 #undef TARGET_ASM_ALIGNED_SI_OP
33190 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33192 #undef TARGET_ASM_ALIGNED_DI_OP
33193 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33196 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33197 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33199 #undef TARGET_ASM_UNALIGNED_HI_OP
33200 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33201 #undef TARGET_ASM_UNALIGNED_SI_OP
33202 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33203 #undef TARGET_ASM_UNALIGNED_DI_OP
33204 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33206 #undef TARGET_PRINT_OPERAND
33207 #define TARGET_PRINT_OPERAND ix86_print_operand
33208 #undef TARGET_PRINT_OPERAND_ADDRESS
33209 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33210 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33211 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33212 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33213 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33215 #undef TARGET_SCHED_ADJUST_COST
33216 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33217 #undef TARGET_SCHED_ISSUE_RATE
33218 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33219 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33220 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33221 ia32_multipass_dfa_lookahead
33223 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33224 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33227 #undef TARGET_HAVE_TLS
33228 #define TARGET_HAVE_TLS true
33230 #undef TARGET_CANNOT_FORCE_CONST_MEM
33231 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33232 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33233 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33235 #undef TARGET_DELEGITIMIZE_ADDRESS
33236 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33238 #undef TARGET_MS_BITFIELD_LAYOUT_P
33239 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33242 #undef TARGET_BINDS_LOCAL_P
33243 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33245 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33246 #undef TARGET_BINDS_LOCAL_P
33247 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33250 #undef TARGET_ASM_OUTPUT_MI_THUNK
33251 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33252 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33253 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33255 #undef TARGET_ASM_FILE_START
33256 #define TARGET_ASM_FILE_START x86_file_start
33258 #undef TARGET_DEFAULT_TARGET_FLAGS
33259 #define TARGET_DEFAULT_TARGET_FLAGS \
33261 | TARGET_SUBTARGET_DEFAULT \
33262 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33265 #undef TARGET_HANDLE_OPTION
33266 #define TARGET_HANDLE_OPTION ix86_handle_option
33268 #undef TARGET_OPTION_OVERRIDE
33269 #define TARGET_OPTION_OVERRIDE ix86_option_override
33270 #undef TARGET_OPTION_OPTIMIZATION
33271 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
33273 #undef TARGET_REGISTER_MOVE_COST
33274 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33275 #undef TARGET_MEMORY_MOVE_COST
33276 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33277 #undef TARGET_RTX_COSTS
33278 #define TARGET_RTX_COSTS ix86_rtx_costs
33279 #undef TARGET_ADDRESS_COST
33280 #define TARGET_ADDRESS_COST ix86_address_cost
33282 #undef TARGET_FIXED_CONDITION_CODE_REGS
33283 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33284 #undef TARGET_CC_MODES_COMPATIBLE
33285 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33287 #undef TARGET_MACHINE_DEPENDENT_REORG
33288 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33290 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33291 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33293 #undef TARGET_BUILD_BUILTIN_VA_LIST
33294 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33296 #undef TARGET_ENUM_VA_LIST_P
33297 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33299 #undef TARGET_FN_ABI_VA_LIST
33300 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33302 #undef TARGET_CANONICAL_VA_LIST_TYPE
33303 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33305 #undef TARGET_EXPAND_BUILTIN_VA_START
33306 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33308 #undef TARGET_MD_ASM_CLOBBERS
33309 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33311 #undef TARGET_PROMOTE_PROTOTYPES
33312 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33313 #undef TARGET_STRUCT_VALUE_RTX
33314 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33315 #undef TARGET_SETUP_INCOMING_VARARGS
33316 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33317 #undef TARGET_MUST_PASS_IN_STACK
33318 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33319 #undef TARGET_FUNCTION_ARG_ADVANCE
33320 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33321 #undef TARGET_FUNCTION_ARG
33322 #define TARGET_FUNCTION_ARG ix86_function_arg
33323 #undef TARGET_PASS_BY_REFERENCE
33324 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33325 #undef TARGET_INTERNAL_ARG_POINTER
33326 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33327 #undef TARGET_UPDATE_STACK_BOUNDARY
33328 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33329 #undef TARGET_GET_DRAP_RTX
33330 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33331 #undef TARGET_STRICT_ARGUMENT_NAMING
33332 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33333 #undef TARGET_STATIC_CHAIN
33334 #define TARGET_STATIC_CHAIN ix86_static_chain
33335 #undef TARGET_TRAMPOLINE_INIT
33336 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33337 #undef TARGET_RETURN_POPS_ARGS
33338 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33349 #undef TARGET_C_MODE_FOR_SUFFIX
33350 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33353 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33354 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33357 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33358 #undef TARGET_INSERT_ATTRIBUTES
33359 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33362 #undef TARGET_MANGLE_TYPE
33363 #define TARGET_MANGLE_TYPE ix86_mangle_type
33365 #undef TARGET_STACK_PROTECT_FAIL
33366 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33368 #undef TARGET_SUPPORTS_SPLIT_STACK
33369 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33371 #undef TARGET_FUNCTION_VALUE
33372 #define TARGET_FUNCTION_VALUE ix86_function_value
33374 #undef TARGET_FUNCTION_VALUE_REGNO_P
33375 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33377 #undef TARGET_SECONDARY_RELOAD
33378 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33380 #undef TARGET_PREFERRED_RELOAD_CLASS
33381 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
33382 #undef TARGET_CLASS_LIKELY_SPILLED_P
33383 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33385 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33386 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33387 ix86_builtin_vectorization_cost
33388 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33389 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33390 ix86_vectorize_builtin_vec_perm
33391 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33392 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33393 ix86_vectorize_builtin_vec_perm_ok
33394 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
33395 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
33396 ix86_preferred_simd_mode
33397 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
33398 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
33399 ix86_autovectorize_vector_sizes
33401 #undef TARGET_SET_CURRENT_FUNCTION
33402 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33404 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33405 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33407 #undef TARGET_OPTION_SAVE
33408 #define TARGET_OPTION_SAVE ix86_function_specific_save
33410 #undef TARGET_OPTION_RESTORE
33411 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33413 #undef TARGET_OPTION_PRINT
33414 #define TARGET_OPTION_PRINT ix86_function_specific_print
33416 #undef TARGET_CAN_INLINE_P
33417 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33419 #undef TARGET_EXPAND_TO_RTL_HOOK
33420 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33422 #undef TARGET_LEGITIMATE_ADDRESS_P
33423 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33425 #undef TARGET_IRA_COVER_CLASSES
33426 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33428 #undef TARGET_FRAME_POINTER_REQUIRED
33429 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33431 #undef TARGET_CAN_ELIMINATE
33432 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33434 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33435 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33437 #undef TARGET_ASM_CODE_END
33438 #define TARGET_ASM_CODE_END ix86_code_end
33440 struct gcc_target targetm = TARGET_INITIALIZER;
33442 #include "gt-i386.h"