1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2013 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "insn-codes.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
50 #include "langhooks.h"
55 #include "tree-pass.h"
59 struct processor_costs {
63 /* Integer signed load */
66 /* Integer zeroed load */
72 /* fmov, fneg, fabs */
76 const int float_plusminus;
82 const int float_cmove;
88 const int float_div_sf;
91 const int float_div_df;
94 const int float_sqrt_sf;
97 const int float_sqrt_df;
105 /* integer multiply cost for each bit set past the most
106 significant 3, so the formula for multiply cost becomes:
109 highest_bit = highest_clear_bit(rs1);
111 highest_bit = highest_set_bit(rs1);
114 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
116 A value of zero indicates that the multiply costs is fixed,
118 const int int_mul_bit_factor;
129 /* penalty for shifts, due to scheduling rules etc. */
130 const int shift_penalty;
134 struct processor_costs cypress_costs = {
135 COSTS_N_INSNS (2), /* int load */
136 COSTS_N_INSNS (2), /* int signed load */
137 COSTS_N_INSNS (2), /* int zeroed load */
138 COSTS_N_INSNS (2), /* float load */
139 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
140 COSTS_N_INSNS (5), /* fadd, fsub */
141 COSTS_N_INSNS (1), /* fcmp */
142 COSTS_N_INSNS (1), /* fmov, fmovr */
143 COSTS_N_INSNS (7), /* fmul */
144 COSTS_N_INSNS (37), /* fdivs */
145 COSTS_N_INSNS (37), /* fdivd */
146 COSTS_N_INSNS (63), /* fsqrts */
147 COSTS_N_INSNS (63), /* fsqrtd */
148 COSTS_N_INSNS (1), /* imul */
149 COSTS_N_INSNS (1), /* imulX */
150 0, /* imul bit factor */
151 COSTS_N_INSNS (1), /* idiv */
152 COSTS_N_INSNS (1), /* idivX */
153 COSTS_N_INSNS (1), /* movcc/movr */
154 0, /* shift penalty */
158 struct processor_costs supersparc_costs = {
159 COSTS_N_INSNS (1), /* int load */
160 COSTS_N_INSNS (1), /* int signed load */
161 COSTS_N_INSNS (1), /* int zeroed load */
162 COSTS_N_INSNS (0), /* float load */
163 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
164 COSTS_N_INSNS (3), /* fadd, fsub */
165 COSTS_N_INSNS (3), /* fcmp */
166 COSTS_N_INSNS (1), /* fmov, fmovr */
167 COSTS_N_INSNS (3), /* fmul */
168 COSTS_N_INSNS (6), /* fdivs */
169 COSTS_N_INSNS (9), /* fdivd */
170 COSTS_N_INSNS (12), /* fsqrts */
171 COSTS_N_INSNS (12), /* fsqrtd */
172 COSTS_N_INSNS (4), /* imul */
173 COSTS_N_INSNS (4), /* imulX */
174 0, /* imul bit factor */
175 COSTS_N_INSNS (4), /* idiv */
176 COSTS_N_INSNS (4), /* idivX */
177 COSTS_N_INSNS (1), /* movcc/movr */
178 1, /* shift penalty */
182 struct processor_costs hypersparc_costs = {
183 COSTS_N_INSNS (1), /* int load */
184 COSTS_N_INSNS (1), /* int signed load */
185 COSTS_N_INSNS (1), /* int zeroed load */
186 COSTS_N_INSNS (1), /* float load */
187 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
188 COSTS_N_INSNS (1), /* fadd, fsub */
189 COSTS_N_INSNS (1), /* fcmp */
190 COSTS_N_INSNS (1), /* fmov, fmovr */
191 COSTS_N_INSNS (1), /* fmul */
192 COSTS_N_INSNS (8), /* fdivs */
193 COSTS_N_INSNS (12), /* fdivd */
194 COSTS_N_INSNS (17), /* fsqrts */
195 COSTS_N_INSNS (17), /* fsqrtd */
196 COSTS_N_INSNS (17), /* imul */
197 COSTS_N_INSNS (17), /* imulX */
198 0, /* imul bit factor */
199 COSTS_N_INSNS (17), /* idiv */
200 COSTS_N_INSNS (17), /* idivX */
201 COSTS_N_INSNS (1), /* movcc/movr */
202 0, /* shift penalty */
206 struct processor_costs leon_costs = {
207 COSTS_N_INSNS (1), /* int load */
208 COSTS_N_INSNS (1), /* int signed load */
209 COSTS_N_INSNS (1), /* int zeroed load */
210 COSTS_N_INSNS (1), /* float load */
211 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
212 COSTS_N_INSNS (1), /* fadd, fsub */
213 COSTS_N_INSNS (1), /* fcmp */
214 COSTS_N_INSNS (1), /* fmov, fmovr */
215 COSTS_N_INSNS (1), /* fmul */
216 COSTS_N_INSNS (15), /* fdivs */
217 COSTS_N_INSNS (15), /* fdivd */
218 COSTS_N_INSNS (23), /* fsqrts */
219 COSTS_N_INSNS (23), /* fsqrtd */
220 COSTS_N_INSNS (5), /* imul */
221 COSTS_N_INSNS (5), /* imulX */
222 0, /* imul bit factor */
223 COSTS_N_INSNS (5), /* idiv */
224 COSTS_N_INSNS (5), /* idivX */
225 COSTS_N_INSNS (1), /* movcc/movr */
226 0, /* shift penalty */
230 struct processor_costs sparclet_costs = {
231 COSTS_N_INSNS (3), /* int load */
232 COSTS_N_INSNS (3), /* int signed load */
233 COSTS_N_INSNS (1), /* int zeroed load */
234 COSTS_N_INSNS (1), /* float load */
235 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
236 COSTS_N_INSNS (1), /* fadd, fsub */
237 COSTS_N_INSNS (1), /* fcmp */
238 COSTS_N_INSNS (1), /* fmov, fmovr */
239 COSTS_N_INSNS (1), /* fmul */
240 COSTS_N_INSNS (1), /* fdivs */
241 COSTS_N_INSNS (1), /* fdivd */
242 COSTS_N_INSNS (1), /* fsqrts */
243 COSTS_N_INSNS (1), /* fsqrtd */
244 COSTS_N_INSNS (5), /* imul */
245 COSTS_N_INSNS (5), /* imulX */
246 0, /* imul bit factor */
247 COSTS_N_INSNS (5), /* idiv */
248 COSTS_N_INSNS (5), /* idivX */
249 COSTS_N_INSNS (1), /* movcc/movr */
250 0, /* shift penalty */
254 struct processor_costs ultrasparc_costs = {
255 COSTS_N_INSNS (2), /* int load */
256 COSTS_N_INSNS (3), /* int signed load */
257 COSTS_N_INSNS (2), /* int zeroed load */
258 COSTS_N_INSNS (2), /* float load */
259 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
260 COSTS_N_INSNS (4), /* fadd, fsub */
261 COSTS_N_INSNS (1), /* fcmp */
262 COSTS_N_INSNS (2), /* fmov, fmovr */
263 COSTS_N_INSNS (4), /* fmul */
264 COSTS_N_INSNS (13), /* fdivs */
265 COSTS_N_INSNS (23), /* fdivd */
266 COSTS_N_INSNS (13), /* fsqrts */
267 COSTS_N_INSNS (23), /* fsqrtd */
268 COSTS_N_INSNS (4), /* imul */
269 COSTS_N_INSNS (4), /* imulX */
270 2, /* imul bit factor */
271 COSTS_N_INSNS (37), /* idiv */
272 COSTS_N_INSNS (68), /* idivX */
273 COSTS_N_INSNS (2), /* movcc/movr */
274 2, /* shift penalty */
278 struct processor_costs ultrasparc3_costs = {
279 COSTS_N_INSNS (2), /* int load */
280 COSTS_N_INSNS (3), /* int signed load */
281 COSTS_N_INSNS (3), /* int zeroed load */
282 COSTS_N_INSNS (2), /* float load */
283 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
284 COSTS_N_INSNS (4), /* fadd, fsub */
285 COSTS_N_INSNS (5), /* fcmp */
286 COSTS_N_INSNS (3), /* fmov, fmovr */
287 COSTS_N_INSNS (4), /* fmul */
288 COSTS_N_INSNS (17), /* fdivs */
289 COSTS_N_INSNS (20), /* fdivd */
290 COSTS_N_INSNS (20), /* fsqrts */
291 COSTS_N_INSNS (29), /* fsqrtd */
292 COSTS_N_INSNS (6), /* imul */
293 COSTS_N_INSNS (6), /* imulX */
294 0, /* imul bit factor */
295 COSTS_N_INSNS (40), /* idiv */
296 COSTS_N_INSNS (71), /* idivX */
297 COSTS_N_INSNS (2), /* movcc/movr */
298 0, /* shift penalty */
302 struct processor_costs niagara_costs = {
303 COSTS_N_INSNS (3), /* int load */
304 COSTS_N_INSNS (3), /* int signed load */
305 COSTS_N_INSNS (3), /* int zeroed load */
306 COSTS_N_INSNS (9), /* float load */
307 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
308 COSTS_N_INSNS (8), /* fadd, fsub */
309 COSTS_N_INSNS (26), /* fcmp */
310 COSTS_N_INSNS (8), /* fmov, fmovr */
311 COSTS_N_INSNS (29), /* fmul */
312 COSTS_N_INSNS (54), /* fdivs */
313 COSTS_N_INSNS (83), /* fdivd */
314 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
315 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
316 COSTS_N_INSNS (11), /* imul */
317 COSTS_N_INSNS (11), /* imulX */
318 0, /* imul bit factor */
319 COSTS_N_INSNS (72), /* idiv */
320 COSTS_N_INSNS (72), /* idivX */
321 COSTS_N_INSNS (1), /* movcc/movr */
322 0, /* shift penalty */
326 struct processor_costs niagara2_costs = {
327 COSTS_N_INSNS (3), /* int load */
328 COSTS_N_INSNS (3), /* int signed load */
329 COSTS_N_INSNS (3), /* int zeroed load */
330 COSTS_N_INSNS (3), /* float load */
331 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
332 COSTS_N_INSNS (6), /* fadd, fsub */
333 COSTS_N_INSNS (6), /* fcmp */
334 COSTS_N_INSNS (6), /* fmov, fmovr */
335 COSTS_N_INSNS (6), /* fmul */
336 COSTS_N_INSNS (19), /* fdivs */
337 COSTS_N_INSNS (33), /* fdivd */
338 COSTS_N_INSNS (19), /* fsqrts */
339 COSTS_N_INSNS (33), /* fsqrtd */
340 COSTS_N_INSNS (5), /* imul */
341 COSTS_N_INSNS (5), /* imulX */
342 0, /* imul bit factor */
343 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
344 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
345 COSTS_N_INSNS (1), /* movcc/movr */
346 0, /* shift penalty */
350 struct processor_costs niagara3_costs = {
351 COSTS_N_INSNS (3), /* int load */
352 COSTS_N_INSNS (3), /* int signed load */
353 COSTS_N_INSNS (3), /* int zeroed load */
354 COSTS_N_INSNS (3), /* float load */
355 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
356 COSTS_N_INSNS (9), /* fadd, fsub */
357 COSTS_N_INSNS (9), /* fcmp */
358 COSTS_N_INSNS (9), /* fmov, fmovr */
359 COSTS_N_INSNS (9), /* fmul */
360 COSTS_N_INSNS (23), /* fdivs */
361 COSTS_N_INSNS (37), /* fdivd */
362 COSTS_N_INSNS (23), /* fsqrts */
363 COSTS_N_INSNS (37), /* fsqrtd */
364 COSTS_N_INSNS (9), /* imul */
365 COSTS_N_INSNS (9), /* imulX */
366 0, /* imul bit factor */
367 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
368 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
369 COSTS_N_INSNS (1), /* movcc/movr */
370 0, /* shift penalty */
374 struct processor_costs niagara4_costs = {
375 COSTS_N_INSNS (5), /* int load */
376 COSTS_N_INSNS (5), /* int signed load */
377 COSTS_N_INSNS (5), /* int zeroed load */
378 COSTS_N_INSNS (5), /* float load */
379 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
380 COSTS_N_INSNS (11), /* fadd, fsub */
381 COSTS_N_INSNS (11), /* fcmp */
382 COSTS_N_INSNS (11), /* fmov, fmovr */
383 COSTS_N_INSNS (11), /* fmul */
384 COSTS_N_INSNS (24), /* fdivs */
385 COSTS_N_INSNS (37), /* fdivd */
386 COSTS_N_INSNS (24), /* fsqrts */
387 COSTS_N_INSNS (37), /* fsqrtd */
388 COSTS_N_INSNS (12), /* imul */
389 COSTS_N_INSNS (12), /* imulX */
390 0, /* imul bit factor */
391 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
392 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
393 COSTS_N_INSNS (1), /* movcc/movr */
394 0, /* shift penalty */
397 static const struct processor_costs *sparc_costs = &cypress_costs;
399 #ifdef HAVE_AS_RELAX_OPTION
400 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
401 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
402 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
403 somebody does not branch between the sethi and jmp. */
404 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
406 #define LEAF_SIBCALL_SLOT_RESERVED_P \
407 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
410 /* Vector to say how input registers are mapped to output registers.
411 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
412 eliminate it. You must use -fomit-frame-pointer to get that. */
413 char leaf_reg_remap[] =
414 { 0, 1, 2, 3, 4, 5, 6, 7,
415 -1, -1, -1, -1, -1, -1, 14, -1,
416 -1, -1, -1, -1, -1, -1, -1, -1,
417 8, 9, 10, 11, 12, 13, -1, 15,
419 32, 33, 34, 35, 36, 37, 38, 39,
420 40, 41, 42, 43, 44, 45, 46, 47,
421 48, 49, 50, 51, 52, 53, 54, 55,
422 56, 57, 58, 59, 60, 61, 62, 63,
423 64, 65, 66, 67, 68, 69, 70, 71,
424 72, 73, 74, 75, 76, 77, 78, 79,
425 80, 81, 82, 83, 84, 85, 86, 87,
426 88, 89, 90, 91, 92, 93, 94, 95,
427 96, 97, 98, 99, 100, 101, 102};
429 /* Vector, indexed by hard register number, which contains 1
430 for a register that is allowable in a candidate for leaf
431 function treatment. */
432 char sparc_leaf_regs[] =
433 { 1, 1, 1, 1, 1, 1, 1, 1,
434 0, 0, 0, 0, 0, 0, 1, 0,
435 0, 0, 0, 0, 0, 0, 0, 0,
436 1, 1, 1, 1, 1, 1, 0, 1,
437 1, 1, 1, 1, 1, 1, 1, 1,
438 1, 1, 1, 1, 1, 1, 1, 1,
439 1, 1, 1, 1, 1, 1, 1, 1,
440 1, 1, 1, 1, 1, 1, 1, 1,
441 1, 1, 1, 1, 1, 1, 1, 1,
442 1, 1, 1, 1, 1, 1, 1, 1,
443 1, 1, 1, 1, 1, 1, 1, 1,
444 1, 1, 1, 1, 1, 1, 1, 1,
445 1, 1, 1, 1, 1, 1, 1};
447 struct GTY(()) machine_function
449 /* Size of the frame of the function. */
450 HOST_WIDE_INT frame_size;
452 /* Size of the frame of the function minus the register window save area
453 and the outgoing argument area. */
454 HOST_WIDE_INT apparent_frame_size;
456 /* Register we pretend the frame pointer is allocated to. Normally, this
457 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
458 record "offset" separately as it may be too big for (reg + disp). */
460 HOST_WIDE_INT frame_base_offset;
462 /* Some local-dynamic TLS symbol name. */
463 const char *some_ld_name;
465 /* Number of global or FP registers to be saved (as 4-byte quantities). */
466 int n_global_fp_regs;
468 /* True if the current function is leaf and uses only leaf regs,
469 so that the SPARC leaf function optimization can be applied.
470 Private version of crtl->uses_only_leaf_regs, see
471 sparc_expand_prologue for the rationale. */
474 /* True if the prologue saves local or in registers. */
475 bool save_local_in_regs_p;
477 /* True if the data calculated by sparc_expand_prologue are valid. */
478 bool prologue_data_valid_p;
481 #define sparc_frame_size cfun->machine->frame_size
482 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
483 #define sparc_frame_base_reg cfun->machine->frame_base_reg
484 #define sparc_frame_base_offset cfun->machine->frame_base_offset
485 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
486 #define sparc_leaf_function_p cfun->machine->leaf_function_p
487 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
488 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
490 /* 1 if the next opcode is to be specially indented. */
491 int sparc_indent_opcode = 0;
493 static void sparc_option_override (void);
494 static void sparc_init_modes (void);
495 static void scan_record_type (const_tree, int *, int *, int *);
496 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
497 const_tree, bool, bool, int *, int *);
499 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
500 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
502 static void sparc_emit_set_const32 (rtx, rtx);
503 static void sparc_emit_set_const64 (rtx, rtx);
504 static void sparc_output_addr_vec (rtx);
505 static void sparc_output_addr_diff_vec (rtx);
506 static void sparc_output_deferred_case_vectors (void);
507 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
508 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
509 static rtx sparc_builtin_saveregs (void);
510 static int epilogue_renumber (rtx *, int);
511 static bool sparc_assemble_integer (rtx, unsigned int, int);
512 static int set_extends (rtx);
513 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
514 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
515 #ifdef TARGET_SOLARIS
516 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
517 tree) ATTRIBUTE_UNUSED;
519 static int sparc_adjust_cost (rtx, rtx, rtx, int);
520 static int sparc_issue_rate (void);
521 static void sparc_sched_init (FILE *, int, int);
522 static int sparc_use_sched_lookahead (void);
524 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
525 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
526 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
527 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
528 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
530 static bool sparc_function_ok_for_sibcall (tree, tree);
531 static void sparc_init_libfuncs (void);
532 static void sparc_init_builtins (void);
533 static void sparc_vis_init_builtins (void);
534 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
535 static tree sparc_fold_builtin (tree, int, tree *, bool);
536 static int sparc_vis_mul8x16 (int, int);
537 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
538 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
539 HOST_WIDE_INT, tree);
540 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
541 HOST_WIDE_INT, const_tree);
542 static struct machine_function * sparc_init_machine_status (void);
543 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
544 static rtx sparc_tls_get_addr (void);
545 static rtx sparc_tls_got (void);
546 static const char *get_some_local_dynamic_name (void);
547 static int get_some_local_dynamic_name_1 (rtx *, void *);
548 static int sparc_register_move_cost (enum machine_mode,
549 reg_class_t, reg_class_t);
550 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
551 static rtx sparc_function_value (const_tree, const_tree, bool);
552 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
553 static bool sparc_function_value_regno_p (const unsigned int);
554 static rtx sparc_struct_value_rtx (tree, int);
555 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
556 int *, const_tree, int);
557 static bool sparc_return_in_memory (const_tree, const_tree);
558 static bool sparc_strict_argument_naming (cumulative_args_t);
559 static void sparc_va_start (tree, rtx);
560 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
561 static bool sparc_vector_mode_supported_p (enum machine_mode);
562 static bool sparc_tls_referenced_p (rtx);
563 static rtx sparc_legitimize_tls_address (rtx);
564 static rtx sparc_legitimize_pic_address (rtx, rtx);
565 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
566 static rtx sparc_delegitimize_address (rtx);
567 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
568 static bool sparc_pass_by_reference (cumulative_args_t,
569 enum machine_mode, const_tree, bool);
570 static void sparc_function_arg_advance (cumulative_args_t,
571 enum machine_mode, const_tree, bool);
572 static rtx sparc_function_arg_1 (cumulative_args_t,
573 enum machine_mode, const_tree, bool, bool);
574 static rtx sparc_function_arg (cumulative_args_t,
575 enum machine_mode, const_tree, bool);
576 static rtx sparc_function_incoming_arg (cumulative_args_t,
577 enum machine_mode, const_tree, bool);
578 static unsigned int sparc_function_arg_boundary (enum machine_mode,
580 static int sparc_arg_partial_bytes (cumulative_args_t,
581 enum machine_mode, tree, bool);
582 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
583 static void sparc_file_end (void);
584 static bool sparc_frame_pointer_required (void);
585 static bool sparc_can_eliminate (const int, const int);
586 static rtx sparc_builtin_setjmp_frame_value (void);
587 static void sparc_conditional_register_usage (void);
588 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
589 static const char *sparc_mangle_type (const_tree);
591 static void sparc_trampoline_init (rtx, tree, rtx);
592 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
593 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
594 static bool sparc_print_operand_punct_valid_p (unsigned char);
595 static void sparc_print_operand (FILE *, rtx, int);
596 static void sparc_print_operand_address (FILE *, rtx);
597 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
599 secondary_reload_info *);
600 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
602 #ifdef SUBTARGET_ATTRIBUTE_TABLE
603 /* Table of valid machine attributes. */
604 static const struct attribute_spec sparc_attribute_table[] =
606 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
608 SUBTARGET_ATTRIBUTE_TABLE,
609 { NULL, 0, 0, false, false, false, NULL, false }
613 /* Option handling. */
616 enum cmodel sparc_cmodel;
618 char sparc_hard_reg_printed[8];
620 /* Initialize the GCC target structure. */
622 /* The default is to use .half rather than .short for aligned HI objects. */
623 #undef TARGET_ASM_ALIGNED_HI_OP
624 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
626 #undef TARGET_ASM_UNALIGNED_HI_OP
627 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
628 #undef TARGET_ASM_UNALIGNED_SI_OP
629 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
630 #undef TARGET_ASM_UNALIGNED_DI_OP
631 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
633 /* The target hook has to handle DI-mode values. */
634 #undef TARGET_ASM_INTEGER
635 #define TARGET_ASM_INTEGER sparc_assemble_integer
637 #undef TARGET_ASM_FUNCTION_PROLOGUE
638 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
639 #undef TARGET_ASM_FUNCTION_EPILOGUE
640 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
642 #undef TARGET_SCHED_ADJUST_COST
643 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
644 #undef TARGET_SCHED_ISSUE_RATE
645 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
646 #undef TARGET_SCHED_INIT
647 #define TARGET_SCHED_INIT sparc_sched_init
648 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
649 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
651 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
652 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
654 #undef TARGET_INIT_LIBFUNCS
655 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
656 #undef TARGET_INIT_BUILTINS
657 #define TARGET_INIT_BUILTINS sparc_init_builtins
659 #undef TARGET_LEGITIMIZE_ADDRESS
660 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
661 #undef TARGET_DELEGITIMIZE_ADDRESS
662 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
663 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
664 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
666 #undef TARGET_EXPAND_BUILTIN
667 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
668 #undef TARGET_FOLD_BUILTIN
669 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
672 #undef TARGET_HAVE_TLS
673 #define TARGET_HAVE_TLS true
676 #undef TARGET_CANNOT_FORCE_CONST_MEM
677 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
679 #undef TARGET_ASM_OUTPUT_MI_THUNK
680 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
681 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
682 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
684 #undef TARGET_RTX_COSTS
685 #define TARGET_RTX_COSTS sparc_rtx_costs
686 #undef TARGET_ADDRESS_COST
687 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
688 #undef TARGET_REGISTER_MOVE_COST
689 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
691 #undef TARGET_PROMOTE_FUNCTION_MODE
692 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
694 #undef TARGET_FUNCTION_VALUE
695 #define TARGET_FUNCTION_VALUE sparc_function_value
696 #undef TARGET_LIBCALL_VALUE
697 #define TARGET_LIBCALL_VALUE sparc_libcall_value
698 #undef TARGET_FUNCTION_VALUE_REGNO_P
699 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
701 #undef TARGET_STRUCT_VALUE_RTX
702 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
703 #undef TARGET_RETURN_IN_MEMORY
704 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
705 #undef TARGET_MUST_PASS_IN_STACK
706 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
707 #undef TARGET_PASS_BY_REFERENCE
708 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
709 #undef TARGET_ARG_PARTIAL_BYTES
710 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
711 #undef TARGET_FUNCTION_ARG_ADVANCE
712 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
713 #undef TARGET_FUNCTION_ARG
714 #define TARGET_FUNCTION_ARG sparc_function_arg
715 #undef TARGET_FUNCTION_INCOMING_ARG
716 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
717 #undef TARGET_FUNCTION_ARG_BOUNDARY
718 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
720 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
721 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
722 #undef TARGET_STRICT_ARGUMENT_NAMING
723 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
725 #undef TARGET_EXPAND_BUILTIN_VA_START
726 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
727 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
728 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
730 #undef TARGET_VECTOR_MODE_SUPPORTED_P
731 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
733 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
734 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
736 #ifdef SUBTARGET_INSERT_ATTRIBUTES
737 #undef TARGET_INSERT_ATTRIBUTES
738 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
741 #ifdef SUBTARGET_ATTRIBUTE_TABLE
742 #undef TARGET_ATTRIBUTE_TABLE
743 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
746 #undef TARGET_RELAXED_ORDERING
747 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
749 #undef TARGET_OPTION_OVERRIDE
750 #define TARGET_OPTION_OVERRIDE sparc_option_override
752 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
753 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
754 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
757 #undef TARGET_ASM_FILE_END
758 #define TARGET_ASM_FILE_END sparc_file_end
760 #undef TARGET_FRAME_POINTER_REQUIRED
761 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
763 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
764 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
766 #undef TARGET_CAN_ELIMINATE
767 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
769 #undef TARGET_PREFERRED_RELOAD_CLASS
770 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
772 #undef TARGET_SECONDARY_RELOAD
773 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
775 #undef TARGET_CONDITIONAL_REGISTER_USAGE
776 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
778 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
779 #undef TARGET_MANGLE_TYPE
780 #define TARGET_MANGLE_TYPE sparc_mangle_type
783 #undef TARGET_LEGITIMATE_ADDRESS_P
784 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
786 #undef TARGET_LEGITIMATE_CONSTANT_P
787 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
789 #undef TARGET_TRAMPOLINE_INIT
790 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
792 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
793 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
794 #undef TARGET_PRINT_OPERAND
795 #define TARGET_PRINT_OPERAND sparc_print_operand
796 #undef TARGET_PRINT_OPERAND_ADDRESS
797 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
799 /* The value stored by LDSTUB. */
800 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
801 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
803 #undef TARGET_CSTORE_MODE
804 #define TARGET_CSTORE_MODE sparc_cstore_mode
806 struct gcc_target targetm = TARGET_INITIALIZER;
808 /* We use a machine specific pass to enable workarounds for errata.
809 We need to have the (essentially) final form of the insn stream in order
810 to properly detect the various hazards. Therefore, this machine specific
811 pass runs as late as possible. The pass is inserted in the pass pipeline
812 at the end of sparc_options_override. */
815 sparc_gate_work_around_errata (void)
817 /* The only erratum we handle for now is that of the AT697F processor. */
818 return sparc_fix_at697f != 0;
822 sparc_do_work_around_errata (void)
826 /* Now look for specific patterns in the insn stream. */
827 for (insn = get_insns (); insn; insn = next)
829 bool insert_nop = false;
832 /* Look for a single-word load into an odd-numbered FP register. */
833 if (NONJUMP_INSN_P (insn)
834 && (set = single_set (insn)) != NULL_RTX
835 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
836 && MEM_P (SET_SRC (set))
837 && REG_P (SET_DEST (set))
838 && REGNO (SET_DEST (set)) > 31
839 && REGNO (SET_DEST (set)) % 2 != 0)
841 /* The wrong dependency is on the enclosing double register. */
842 unsigned int x = REGNO (SET_DEST (set)) - 1;
843 unsigned int src1, src2, dest;
846 /* If the insn has a delay slot, then it cannot be problematic. */
847 next = next_active_insn (insn);
848 if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
853 code = INSN_CODE (next);
858 case CODE_FOR_adddf3:
859 case CODE_FOR_subdf3:
860 case CODE_FOR_muldf3:
861 case CODE_FOR_divdf3:
862 dest = REGNO (recog_data.operand[0]);
863 src1 = REGNO (recog_data.operand[1]);
864 src2 = REGNO (recog_data.operand[2]);
869 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
870 if ((src1 == x || src2 == x)
871 && (dest == src1 || dest == src2))
878 FPOPd %fx, %fx, %fx */
881 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
886 case CODE_FOR_sqrtdf2:
887 dest = REGNO (recog_data.operand[0]);
888 src1 = REGNO (recog_data.operand[1]);
892 if (src1 == x && dest == src1)
901 next = NEXT_INSN (insn);
904 emit_insn_after (gen_nop (), insn);
909 struct rtl_opt_pass pass_work_around_errata =
914 OPTGROUP_NONE, /* optinfo_flags */
915 sparc_gate_work_around_errata, /* gate */
916 sparc_do_work_around_errata, /* execute */
919 0, /* static_pass_number */
920 TV_MACH_DEP, /* tv_id */
921 0, /* properties_required */
922 0, /* properties_provided */
923 0, /* properties_destroyed */
924 0, /* todo_flags_start */
925 TODO_verify_rtl_sharing, /* todo_flags_finish */
929 struct register_pass_info insert_pass_work_around_errata =
931 &pass_work_around_errata.pass, /* pass */
932 "dbr", /* reference_pass_name */
933 1, /* ref_pass_instance_number */
934 PASS_POS_INSERT_AFTER /* po_op */
937 /* Helpers for TARGET_DEBUG_OPTIONS. */
939 dump_target_flag_bits (const int flags)
941 if (flags & MASK_64BIT)
942 fprintf (stderr, "64BIT ");
943 if (flags & MASK_APP_REGS)
944 fprintf (stderr, "APP_REGS ");
945 if (flags & MASK_FASTER_STRUCTS)
946 fprintf (stderr, "FASTER_STRUCTS ");
947 if (flags & MASK_FLAT)
948 fprintf (stderr, "FLAT ");
949 if (flags & MASK_FMAF)
950 fprintf (stderr, "FMAF ");
951 if (flags & MASK_FPU)
952 fprintf (stderr, "FPU ");
953 if (flags & MASK_HARD_QUAD)
954 fprintf (stderr, "HARD_QUAD ");
955 if (flags & MASK_POPC)
956 fprintf (stderr, "POPC ");
957 if (flags & MASK_PTR64)
958 fprintf (stderr, "PTR64 ");
959 if (flags & MASK_STACK_BIAS)
960 fprintf (stderr, "STACK_BIAS ");
961 if (flags & MASK_UNALIGNED_DOUBLES)
962 fprintf (stderr, "UNALIGNED_DOUBLES ");
963 if (flags & MASK_V8PLUS)
964 fprintf (stderr, "V8PLUS ");
965 if (flags & MASK_VIS)
966 fprintf (stderr, "VIS ");
967 if (flags & MASK_VIS2)
968 fprintf (stderr, "VIS2 ");
969 if (flags & MASK_VIS3)
970 fprintf (stderr, "VIS3 ");
971 if (flags & MASK_CBCOND)
972 fprintf (stderr, "CBCOND ");
973 if (flags & MASK_DEPRECATED_V8_INSNS)
974 fprintf (stderr, "DEPRECATED_V8_INSNS ");
975 if (flags & MASK_SPARCLET)
976 fprintf (stderr, "SPARCLET ");
977 if (flags & MASK_SPARCLITE)
978 fprintf (stderr, "SPARCLITE ");
980 fprintf (stderr, "V8 ");
982 fprintf (stderr, "V9 ");
986 dump_target_flags (const char *prefix, const int flags)
988 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
989 dump_target_flag_bits (flags);
990 fprintf(stderr, "]\n");
993 /* Validate and override various options, and do some machine dependent
997 sparc_option_override (void)
999 static struct code_model {
1000 const char *const name;
1001 const enum cmodel value;
1002 } const cmodels[] = {
1004 { "medlow", CM_MEDLOW },
1005 { "medmid", CM_MEDMID },
1006 { "medany", CM_MEDANY },
1007 { "embmedany", CM_EMBMEDANY },
1008 { NULL, (enum cmodel) 0 }
1010 const struct code_model *cmodel;
1011 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1012 static struct cpu_default {
1014 const enum processor_type processor;
1015 } const cpu_default[] = {
1016 /* There must be one entry here for each TARGET_CPU value. */
1017 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1018 { TARGET_CPU_v8, PROCESSOR_V8 },
1019 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1020 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1021 { TARGET_CPU_leon, PROCESSOR_LEON },
1022 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1023 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1024 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1025 { TARGET_CPU_v9, PROCESSOR_V9 },
1026 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1027 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1028 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1029 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1030 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1031 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1032 { -1, PROCESSOR_V7 }
1034 const struct cpu_default *def;
1035 /* Table of values for -m{cpu,tune}=. This must match the order of
1036 the PROCESSOR_* enumeration. */
1037 static struct cpu_table {
1038 const char *const name;
1041 } const cpu_table[] = {
1042 { "v7", MASK_ISA, 0 },
1043 { "cypress", MASK_ISA, 0 },
1044 { "v8", MASK_ISA, MASK_V8 },
1045 /* TI TMS390Z55 supersparc */
1046 { "supersparc", MASK_ISA, MASK_V8 },
1047 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1049 { "leon", MASK_ISA, MASK_V8|MASK_FPU },
1050 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1051 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1052 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1053 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1054 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1055 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1056 { "sparclet", MASK_ISA, MASK_SPARCLET },
1057 /* TEMIC sparclet */
1058 { "tsc701", MASK_ISA, MASK_SPARCLET },
1059 { "v9", MASK_ISA, MASK_V9 },
1060 /* UltraSPARC I, II, IIi */
1061 { "ultrasparc", MASK_ISA,
1062 /* Although insns using %y are deprecated, it is a clear win. */
1063 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1064 /* UltraSPARC III */
1065 /* ??? Check if %y issue still holds true. */
1066 { "ultrasparc3", MASK_ISA,
1067 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1069 { "niagara", MASK_ISA,
1070 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1072 { "niagara2", MASK_ISA,
1073 MASK_V9|MASK_POPC|MASK_VIS2 },
1075 { "niagara3", MASK_ISA,
1076 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1078 { "niagara4", MASK_ISA,
1079 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1081 const struct cpu_table *cpu;
1085 if (sparc_debug_string != NULL)
1090 p = ASTRDUP (sparc_debug_string);
1091 while ((q = strtok (p, ",")) != NULL)
1105 if (! strcmp (q, "all"))
1106 mask = MASK_DEBUG_ALL;
1107 else if (! strcmp (q, "options"))
1108 mask = MASK_DEBUG_OPTIONS;
1110 error ("unknown -mdebug-%s switch", q);
1113 sparc_debug &= ~mask;
1115 sparc_debug |= mask;
1119 if (TARGET_DEBUG_OPTIONS)
1121 dump_target_flags("Initial target_flags", target_flags);
1122 dump_target_flags("target_flags_explicit", target_flags_explicit);
1125 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1126 SUBTARGET_OVERRIDE_OPTIONS;
1129 #ifndef SPARC_BI_ARCH
1130 /* Check for unsupported architecture size. */
1131 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1132 error ("%s is not supported by this configuration",
1133 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1136 /* We force all 64bit archs to use 128 bit long double */
1137 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1139 error ("-mlong-double-64 not allowed with -m64");
1140 target_flags |= MASK_LONG_DOUBLE_128;
1143 /* Code model selection. */
1144 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1146 #ifdef SPARC_BI_ARCH
1148 sparc_cmodel = CM_32;
1151 if (sparc_cmodel_string != NULL)
1155 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1156 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1158 if (cmodel->name == NULL)
1159 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1161 sparc_cmodel = cmodel->value;
1164 error ("-mcmodel= is not supported on 32 bit systems");
1167 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1168 for (i = 8; i < 16; i++)
1169 if (!call_used_regs [i])
1171 error ("-fcall-saved-REG is not supported for out registers");
1172 call_used_regs [i] = 1;
1175 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1177 /* Set the default CPU. */
1178 if (!global_options_set.x_sparc_cpu_and_features)
1180 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1181 if (def->cpu == TARGET_CPU_DEFAULT)
1183 gcc_assert (def->cpu != -1);
1184 sparc_cpu_and_features = def->processor;
1187 if (!global_options_set.x_sparc_cpu)
1188 sparc_cpu = sparc_cpu_and_features;
1190 cpu = &cpu_table[(int) sparc_cpu_and_features];
1192 if (TARGET_DEBUG_OPTIONS)
1194 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1195 fprintf (stderr, "sparc_cpu: %s\n",
1196 cpu_table[(int) sparc_cpu].name);
1197 dump_target_flags ("cpu->disable", cpu->disable);
1198 dump_target_flags ("cpu->enable", cpu->enable);
1201 target_flags &= ~cpu->disable;
1202 target_flags |= (cpu->enable
1203 #ifndef HAVE_AS_FMAF_HPC_VIS3
1204 & ~(MASK_FMAF | MASK_VIS3)
1206 #ifndef HAVE_AS_SPARC4
1211 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1212 the processor default. */
1213 if (target_flags_explicit & MASK_FPU)
1214 target_flags = (target_flags & ~MASK_FPU) | fpu;
1216 /* -mvis2 implies -mvis */
1218 target_flags |= MASK_VIS;
1220 /* -mvis3 implies -mvis2 and -mvis */
1222 target_flags |= MASK_VIS2 | MASK_VIS;
1224 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1227 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1229 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1231 -m64 also implies v9. */
1232 if (TARGET_VIS || TARGET_ARCH64)
1234 target_flags |= MASK_V9;
1235 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1238 /* -mvis also implies -mv8plus on 32-bit */
1239 if (TARGET_VIS && ! TARGET_ARCH64)
1240 target_flags |= MASK_V8PLUS;
1242 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1243 if (TARGET_V9 && TARGET_ARCH32)
1244 target_flags |= MASK_DEPRECATED_V8_INSNS;
1246 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1247 if (! TARGET_V9 || TARGET_ARCH64)
1248 target_flags &= ~MASK_V8PLUS;
1250 /* Don't use stack biasing in 32 bit mode. */
1252 target_flags &= ~MASK_STACK_BIAS;
1254 /* Supply a default value for align_functions. */
1255 if (align_functions == 0
1256 && (sparc_cpu == PROCESSOR_ULTRASPARC
1257 || sparc_cpu == PROCESSOR_ULTRASPARC3
1258 || sparc_cpu == PROCESSOR_NIAGARA
1259 || sparc_cpu == PROCESSOR_NIAGARA2
1260 || sparc_cpu == PROCESSOR_NIAGARA3
1261 || sparc_cpu == PROCESSOR_NIAGARA4))
1262 align_functions = 32;
1264 /* Validate PCC_STRUCT_RETURN. */
1265 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1266 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1268 /* Only use .uaxword when compiling for a 64-bit target. */
1270 targetm.asm_out.unaligned_op.di = NULL;
1272 /* Do various machine dependent initializations. */
1273 sparc_init_modes ();
1275 /* Set up function hooks. */
1276 init_machine_status = sparc_init_machine_status;
1281 case PROCESSOR_CYPRESS:
1282 sparc_costs = &cypress_costs;
1285 case PROCESSOR_SPARCLITE:
1286 case PROCESSOR_SUPERSPARC:
1287 sparc_costs = &supersparc_costs;
1289 case PROCESSOR_F930:
1290 case PROCESSOR_F934:
1291 case PROCESSOR_HYPERSPARC:
1292 case PROCESSOR_SPARCLITE86X:
1293 sparc_costs = &hypersparc_costs;
1295 case PROCESSOR_LEON:
1296 sparc_costs = &leon_costs;
1298 case PROCESSOR_SPARCLET:
1299 case PROCESSOR_TSC701:
1300 sparc_costs = &sparclet_costs;
1303 case PROCESSOR_ULTRASPARC:
1304 sparc_costs = &ultrasparc_costs;
1306 case PROCESSOR_ULTRASPARC3:
1307 sparc_costs = &ultrasparc3_costs;
1309 case PROCESSOR_NIAGARA:
1310 sparc_costs = &niagara_costs;
1312 case PROCESSOR_NIAGARA2:
1313 sparc_costs = &niagara2_costs;
1315 case PROCESSOR_NIAGARA3:
1316 sparc_costs = &niagara3_costs;
1318 case PROCESSOR_NIAGARA4:
1319 sparc_costs = &niagara4_costs;
1321 case PROCESSOR_NATIVE:
1325 if (sparc_memory_model == SMM_DEFAULT)
1327 /* Choose the memory model for the operating system. */
1328 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1329 if (os_default != SMM_DEFAULT)
1330 sparc_memory_model = os_default;
1331 /* Choose the most relaxed model for the processor. */
1333 sparc_memory_model = SMM_RMO;
1335 sparc_memory_model = SMM_PSO;
1337 sparc_memory_model = SMM_SC;
1340 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1341 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1342 target_flags |= MASK_LONG_DOUBLE_128;
1345 if (TARGET_DEBUG_OPTIONS)
1346 dump_target_flags ("Final target_flags", target_flags);
1348 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1349 ((sparc_cpu == PROCESSOR_ULTRASPARC
1350 || sparc_cpu == PROCESSOR_NIAGARA
1351 || sparc_cpu == PROCESSOR_NIAGARA2
1352 || sparc_cpu == PROCESSOR_NIAGARA3
1353 || sparc_cpu == PROCESSOR_NIAGARA4)
1355 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1357 global_options.x_param_values,
1358 global_options_set.x_param_values);
1359 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1360 ((sparc_cpu == PROCESSOR_ULTRASPARC
1361 || sparc_cpu == PROCESSOR_ULTRASPARC3
1362 || sparc_cpu == PROCESSOR_NIAGARA
1363 || sparc_cpu == PROCESSOR_NIAGARA2
1364 || sparc_cpu == PROCESSOR_NIAGARA3
1365 || sparc_cpu == PROCESSOR_NIAGARA4)
1367 global_options.x_param_values,
1368 global_options_set.x_param_values);
1370 /* Disable save slot sharing for call-clobbered registers by default.
1371 The IRA sharing algorithm works on single registers only and this
1372 pessimizes for double floating-point registers. */
1373 if (!global_options_set.x_flag_ira_share_save_slots)
1374 flag_ira_share_save_slots = 0;
1376 /* We register a machine specific pass to work around errata, if any.
1377 The pass mut be scheduled as late as possible so that we have the
1378 (essentially) final form of the insn stream to work on.
1379 Registering the pass must be done at start up. It's convenient to
1381 register_pass (&insert_pass_work_around_errata);
1384 /* Miscellaneous utilities. */
1386 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1387 or branch on register contents instructions. */
1390 v9_regcmp_p (enum rtx_code code)
1392 return (code == EQ || code == NE || code == GE || code == LT
1393 || code == LE || code == GT);
1396 /* Nonzero if OP is a floating point constant which can
1397 be loaded into an integer register using a single
1398 sethi instruction. */
1403 if (GET_CODE (op) == CONST_DOUBLE)
1408 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1409 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1410 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1416 /* Nonzero if OP is a floating point constant which can
1417 be loaded into an integer register using a single
1423 if (GET_CODE (op) == CONST_DOUBLE)
1428 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1429 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1430 return SPARC_SIMM13_P (i);
1436 /* Nonzero if OP is a floating point constant which can
1437 be loaded into an integer register using a high/losum
1438 instruction sequence. */
1441 fp_high_losum_p (rtx op)
1443 /* The constraints calling this should only be in
1444 SFmode move insns, so any constant which cannot
1445 be moved using a single insn will do. */
1446 if (GET_CODE (op) == CONST_DOUBLE)
1451 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1452 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1453 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1459 /* Return true if the address of LABEL can be loaded by means of the
1460 mov{si,di}_pic_label_ref patterns in PIC mode. */
1463 can_use_mov_pic_label_ref (rtx label)
1465 /* VxWorks does not impose a fixed gap between segments; the run-time
1466 gap can be different from the object-file gap. We therefore can't
1467 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1468 are absolutely sure that X is in the same segment as the GOT.
1469 Unfortunately, the flexibility of linker scripts means that we
1470 can't be sure of that in general, so assume that GOT-relative
1471 accesses are never valid on VxWorks. */
1472 if (TARGET_VXWORKS_RTP)
1475 /* Similarly, if the label is non-local, it might end up being placed
1476 in a different section than the current one; now mov_pic_label_ref
1477 requires the label and the code to be in the same section. */
1478 if (LABEL_REF_NONLOCAL_P (label))
1481 /* Finally, if we are reordering basic blocks and partition into hot
1482 and cold sections, this might happen for any label. */
1483 if (flag_reorder_blocks_and_partition)
1489 /* Expand a move instruction. Return true if all work is done. */
1492 sparc_expand_move (enum machine_mode mode, rtx *operands)
1494 /* Handle sets of MEM first. */
1495 if (GET_CODE (operands[0]) == MEM)
1497 /* 0 is a register (or a pair of registers) on SPARC. */
1498 if (register_or_zero_operand (operands[1], mode))
1501 if (!reload_in_progress)
1503 operands[0] = validize_mem (operands[0]);
1504 operands[1] = force_reg (mode, operands[1]);
1508 /* Fixup TLS cases. */
1510 && CONSTANT_P (operands[1])
1511 && sparc_tls_referenced_p (operands [1]))
1513 operands[1] = sparc_legitimize_tls_address (operands[1]);
1517 /* Fixup PIC cases. */
1518 if (flag_pic && CONSTANT_P (operands[1]))
1520 if (pic_address_needs_scratch (operands[1]))
1521 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1523 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1524 if (GET_CODE (operands[1]) == LABEL_REF
1525 && can_use_mov_pic_label_ref (operands[1]))
1529 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1535 gcc_assert (TARGET_ARCH64);
1536 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1541 if (symbolic_operand (operands[1], mode))
1544 = sparc_legitimize_pic_address (operands[1],
1546 ? operands[0] : NULL_RTX);
1551 /* If we are trying to toss an integer constant into FP registers,
1552 or loading a FP or vector constant, force it into memory. */
1553 if (CONSTANT_P (operands[1])
1554 && REG_P (operands[0])
1555 && (SPARC_FP_REG_P (REGNO (operands[0]))
1556 || SCALAR_FLOAT_MODE_P (mode)
1557 || VECTOR_MODE_P (mode)))
1559 /* emit_group_store will send such bogosity to us when it is
1560 not storing directly into memory. So fix this up to avoid
1561 crashes in output_constant_pool. */
1562 if (operands [1] == const0_rtx)
1563 operands[1] = CONST0_RTX (mode);
1565 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1566 always other regs. */
1567 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1568 && (const_zero_operand (operands[1], mode)
1569 || const_all_ones_operand (operands[1], mode)))
1572 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1573 /* We are able to build any SF constant in integer registers
1574 with at most 2 instructions. */
1576 /* And any DF constant in integer registers. */
1578 && ! can_create_pseudo_p ())))
1581 operands[1] = force_const_mem (mode, operands[1]);
1582 if (!reload_in_progress)
1583 operands[1] = validize_mem (operands[1]);
1587 /* Accept non-constants and valid constants unmodified. */
1588 if (!CONSTANT_P (operands[1])
1589 || GET_CODE (operands[1]) == HIGH
1590 || input_operand (operands[1], mode))
1596 /* All QImode constants require only one insn, so proceed. */
1601 sparc_emit_set_const32 (operands[0], operands[1]);
1605 /* input_operand should have filtered out 32-bit mode. */
1606 sparc_emit_set_const64 (operands[0], operands[1]);
1612 /* TImode isn't available in 32-bit mode. */
1613 split_double (operands[1], &high, &low);
1614 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1616 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1628 /* Load OP1, a 32-bit constant, into OP0, a register.
1629 We know it can't be done in one insn when we get
1630 here, the move expander guarantees this. */
1633 sparc_emit_set_const32 (rtx op0, rtx op1)
1635 enum machine_mode mode = GET_MODE (op0);
1638 if (can_create_pseudo_p ())
1639 temp = gen_reg_rtx (mode);
1641 if (GET_CODE (op1) == CONST_INT)
1643 gcc_assert (!small_int_operand (op1, mode)
1644 && !const_high_operand (op1, mode));
1646 /* Emit them as real moves instead of a HIGH/LO_SUM,
1647 this way CSE can see everything and reuse intermediate
1648 values if it wants. */
1649 emit_insn (gen_rtx_SET (VOIDmode, temp,
1650 GEN_INT (INTVAL (op1)
1651 & ~(HOST_WIDE_INT)0x3ff)));
1653 emit_insn (gen_rtx_SET (VOIDmode,
1655 gen_rtx_IOR (mode, temp,
1656 GEN_INT (INTVAL (op1) & 0x3ff))));
1660 /* A symbol, emit in the traditional way. */
1661 emit_insn (gen_rtx_SET (VOIDmode, temp,
1662 gen_rtx_HIGH (mode, op1)));
1663 emit_insn (gen_rtx_SET (VOIDmode,
1664 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1668 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1669 If TEMP is nonzero, we are forbidden to use any other scratch
1670 registers. Otherwise, we are allowed to generate them as needed.
1672 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1673 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1676 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1678 rtx temp1, temp2, temp3, temp4, temp5;
1681 if (temp && GET_MODE (temp) == TImode)
1684 temp = gen_rtx_REG (DImode, REGNO (temp));
1687 /* SPARC-V9 code-model support. */
1688 switch (sparc_cmodel)
1691 /* The range spanned by all instructions in the object is less
1692 than 2^31 bytes (2GB) and the distance from any instruction
1693 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1694 than 2^31 bytes (2GB).
1696 The executable must be in the low 4TB of the virtual address
1699 sethi %hi(symbol), %temp1
1700 or %temp1, %lo(symbol), %reg */
1702 temp1 = temp; /* op0 is allowed. */
1704 temp1 = gen_reg_rtx (DImode);
1706 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1707 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1711 /* The range spanned by all instructions in the object is less
1712 than 2^31 bytes (2GB) and the distance from any instruction
1713 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1714 than 2^31 bytes (2GB).
1716 The executable must be in the low 16TB of the virtual address
1719 sethi %h44(symbol), %temp1
1720 or %temp1, %m44(symbol), %temp2
1721 sllx %temp2, 12, %temp3
1722 or %temp3, %l44(symbol), %reg */
1727 temp3 = temp; /* op0 is allowed. */
1731 temp1 = gen_reg_rtx (DImode);
1732 temp2 = gen_reg_rtx (DImode);
1733 temp3 = gen_reg_rtx (DImode);
1736 emit_insn (gen_seth44 (temp1, op1));
1737 emit_insn (gen_setm44 (temp2, temp1, op1));
1738 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1739 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1740 emit_insn (gen_setl44 (op0, temp3, op1));
1744 /* The range spanned by all instructions in the object is less
1745 than 2^31 bytes (2GB) and the distance from any instruction
1746 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1747 than 2^31 bytes (2GB).
1749 The executable can be placed anywhere in the virtual address
1752 sethi %hh(symbol), %temp1
1753 sethi %lm(symbol), %temp2
1754 or %temp1, %hm(symbol), %temp3
1755 sllx %temp3, 32, %temp4
1756 or %temp4, %temp2, %temp5
1757 or %temp5, %lo(symbol), %reg */
1760 /* It is possible that one of the registers we got for operands[2]
1761 might coincide with that of operands[0] (which is why we made
1762 it TImode). Pick the other one to use as our scratch. */
1763 if (rtx_equal_p (temp, op0))
1765 gcc_assert (ti_temp);
1766 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1769 temp2 = temp; /* op0 is _not_ allowed, see above. */
1776 temp1 = gen_reg_rtx (DImode);
1777 temp2 = gen_reg_rtx (DImode);
1778 temp3 = gen_reg_rtx (DImode);
1779 temp4 = gen_reg_rtx (DImode);
1780 temp5 = gen_reg_rtx (DImode);
1783 emit_insn (gen_sethh (temp1, op1));
1784 emit_insn (gen_setlm (temp2, op1));
1785 emit_insn (gen_sethm (temp3, temp1, op1));
1786 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1787 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1788 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1789 gen_rtx_PLUS (DImode, temp4, temp2)));
1790 emit_insn (gen_setlo (op0, temp5, op1));
1794 /* Old old old backwards compatibility kruft here.
1795 Essentially it is MEDLOW with a fixed 64-bit
1796 virtual base added to all data segment addresses.
1797 Text-segment stuff is computed like MEDANY, we can't
1798 reuse the code above because the relocation knobs
1801 Data segment: sethi %hi(symbol), %temp1
1802 add %temp1, EMBMEDANY_BASE_REG, %temp2
1803 or %temp2, %lo(symbol), %reg */
1804 if (data_segment_operand (op1, GET_MODE (op1)))
1808 temp1 = temp; /* op0 is allowed. */
1813 temp1 = gen_reg_rtx (DImode);
1814 temp2 = gen_reg_rtx (DImode);
1817 emit_insn (gen_embmedany_sethi (temp1, op1));
1818 emit_insn (gen_embmedany_brsum (temp2, temp1));
1819 emit_insn (gen_embmedany_losum (op0, temp2, op1));
1822 /* Text segment: sethi %uhi(symbol), %temp1
1823 sethi %hi(symbol), %temp2
1824 or %temp1, %ulo(symbol), %temp3
1825 sllx %temp3, 32, %temp4
1826 or %temp4, %temp2, %temp5
1827 or %temp5, %lo(symbol), %reg */
1832 /* It is possible that one of the registers we got for operands[2]
1833 might coincide with that of operands[0] (which is why we made
1834 it TImode). Pick the other one to use as our scratch. */
1835 if (rtx_equal_p (temp, op0))
1837 gcc_assert (ti_temp);
1838 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1841 temp2 = temp; /* op0 is _not_ allowed, see above. */
1848 temp1 = gen_reg_rtx (DImode);
1849 temp2 = gen_reg_rtx (DImode);
1850 temp3 = gen_reg_rtx (DImode);
1851 temp4 = gen_reg_rtx (DImode);
1852 temp5 = gen_reg_rtx (DImode);
1855 emit_insn (gen_embmedany_textuhi (temp1, op1));
1856 emit_insn (gen_embmedany_texthi (temp2, op1));
1857 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
1858 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1859 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1860 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1861 gen_rtx_PLUS (DImode, temp4, temp2)));
1862 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
1871 #if HOST_BITS_PER_WIDE_INT == 32
1873 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
1878 /* These avoid problems when cross compiling. If we do not
1879 go through all this hair then the optimizer will see
1880 invalid REG_EQUAL notes or in some cases none at all. */
1881 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
1882 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
1883 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
1884 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
1886 /* The optimizer is not to assume anything about exactly
1887 which bits are set for a HIGH, they are unspecified.
1888 Unfortunately this leads to many missed optimizations
1889 during CSE. We mask out the non-HIGH bits, and matches
1890 a plain movdi, to alleviate this problem. */
1892 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
1894 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
1898 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
1900 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
1904 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
1906 return gen_rtx_IOR (DImode, src, GEN_INT (val));
1910 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
1912 return gen_rtx_XOR (DImode, src, GEN_INT (val));
1915 /* Worker routines for 64-bit constant formation on arch64.
1916 One of the key things to be doing in these emissions is
1917 to create as many temp REGs as possible. This makes it
1918 possible for half-built constants to be used later when
1919 such values are similar to something required later on.
1920 Without doing this, the optimizer cannot see such
1923 static void sparc_emit_set_const64_quick1 (rtx, rtx,
1924 unsigned HOST_WIDE_INT, int);
1927 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
1928 unsigned HOST_WIDE_INT low_bits, int is_neg)
1930 unsigned HOST_WIDE_INT high_bits;
1933 high_bits = (~low_bits) & 0xffffffff;
1935 high_bits = low_bits;
1937 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1940 emit_insn (gen_rtx_SET (VOIDmode, op0,
1941 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1945 /* If we are XOR'ing with -1, then we should emit a one's complement
1946 instead. This way the combiner will notice logical operations
1947 such as ANDN later on and substitute. */
1948 if ((low_bits & 0x3ff) == 0x3ff)
1950 emit_insn (gen_rtx_SET (VOIDmode, op0,
1951 gen_rtx_NOT (DImode, temp)));
1955 emit_insn (gen_rtx_SET (VOIDmode, op0,
1956 gen_safe_XOR64 (temp,
1957 (-(HOST_WIDE_INT)0x400
1958 | (low_bits & 0x3ff)))));
1963 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
1964 unsigned HOST_WIDE_INT, int);
1967 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
1968 unsigned HOST_WIDE_INT high_bits,
1969 unsigned HOST_WIDE_INT low_immediate,
1974 if ((high_bits & 0xfffffc00) != 0)
1976 emit_insn (gen_safe_HIGH64 (temp, high_bits));
1977 if ((high_bits & ~0xfffffc00) != 0)
1978 emit_insn (gen_rtx_SET (VOIDmode, op0,
1979 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
1985 emit_insn (gen_safe_SET64 (temp, high_bits));
1989 /* Now shift it up into place. */
1990 emit_insn (gen_rtx_SET (VOIDmode, op0,
1991 gen_rtx_ASHIFT (DImode, temp2,
1992 GEN_INT (shift_count))));
1994 /* If there is a low immediate part piece, finish up by
1995 putting that in as well. */
1996 if (low_immediate != 0)
1997 emit_insn (gen_rtx_SET (VOIDmode, op0,
1998 gen_safe_OR64 (op0, low_immediate)));
2001 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2002 unsigned HOST_WIDE_INT);
2004 /* Full 64-bit constant decomposition. Even though this is the
2005 'worst' case, we still optimize a few things away. */
2007 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2008 unsigned HOST_WIDE_INT high_bits,
2009 unsigned HOST_WIDE_INT low_bits)
2013 if (can_create_pseudo_p ())
2014 sub_temp = gen_reg_rtx (DImode);
2016 if ((high_bits & 0xfffffc00) != 0)
2018 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2019 if ((high_bits & ~0xfffffc00) != 0)
2020 emit_insn (gen_rtx_SET (VOIDmode,
2022 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2028 emit_insn (gen_safe_SET64 (temp, high_bits));
2032 if (can_create_pseudo_p ())
2034 rtx temp2 = gen_reg_rtx (DImode);
2035 rtx temp3 = gen_reg_rtx (DImode);
2036 rtx temp4 = gen_reg_rtx (DImode);
2038 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2039 gen_rtx_ASHIFT (DImode, sub_temp,
2042 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2043 if ((low_bits & ~0xfffffc00) != 0)
2045 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2046 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2047 emit_insn (gen_rtx_SET (VOIDmode, op0,
2048 gen_rtx_PLUS (DImode, temp4, temp3)));
2052 emit_insn (gen_rtx_SET (VOIDmode, op0,
2053 gen_rtx_PLUS (DImode, temp4, temp2)));
2058 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2059 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2060 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2063 /* We are in the middle of reload, so this is really
2064 painful. However we do still make an attempt to
2065 avoid emitting truly stupid code. */
2066 if (low1 != const0_rtx)
2068 emit_insn (gen_rtx_SET (VOIDmode, op0,
2069 gen_rtx_ASHIFT (DImode, sub_temp,
2070 GEN_INT (to_shift))));
2071 emit_insn (gen_rtx_SET (VOIDmode, op0,
2072 gen_rtx_IOR (DImode, op0, low1)));
2080 if (low2 != const0_rtx)
2082 emit_insn (gen_rtx_SET (VOIDmode, op0,
2083 gen_rtx_ASHIFT (DImode, sub_temp,
2084 GEN_INT (to_shift))));
2085 emit_insn (gen_rtx_SET (VOIDmode, op0,
2086 gen_rtx_IOR (DImode, op0, low2)));
2094 emit_insn (gen_rtx_SET (VOIDmode, op0,
2095 gen_rtx_ASHIFT (DImode, sub_temp,
2096 GEN_INT (to_shift))));
2097 if (low3 != const0_rtx)
2098 emit_insn (gen_rtx_SET (VOIDmode, op0,
2099 gen_rtx_IOR (DImode, op0, low3)));
2104 /* Analyze a 64-bit constant for certain properties. */
2105 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2106 unsigned HOST_WIDE_INT,
2107 int *, int *, int *);
2110 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2111 unsigned HOST_WIDE_INT low_bits,
2112 int *hbsp, int *lbsp, int *abbasp)
2114 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2117 lowest_bit_set = highest_bit_set = -1;
2121 if ((lowest_bit_set == -1)
2122 && ((low_bits >> i) & 1))
2124 if ((highest_bit_set == -1)
2125 && ((high_bits >> (32 - i - 1)) & 1))
2126 highest_bit_set = (64 - i - 1);
2129 && ((highest_bit_set == -1)
2130 || (lowest_bit_set == -1)));
2136 if ((lowest_bit_set == -1)
2137 && ((high_bits >> i) & 1))
2138 lowest_bit_set = i + 32;
2139 if ((highest_bit_set == -1)
2140 && ((low_bits >> (32 - i - 1)) & 1))
2141 highest_bit_set = 32 - i - 1;
2144 && ((highest_bit_set == -1)
2145 || (lowest_bit_set == -1)));
2147 /* If there are no bits set this should have gone out
2148 as one instruction! */
2149 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2150 all_bits_between_are_set = 1;
2151 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2155 if ((low_bits & (1 << i)) != 0)
2160 if ((high_bits & (1 << (i - 32))) != 0)
2163 all_bits_between_are_set = 0;
2166 *hbsp = highest_bit_set;
2167 *lbsp = lowest_bit_set;
2168 *abbasp = all_bits_between_are_set;
2171 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2174 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2175 unsigned HOST_WIDE_INT low_bits)
2177 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2180 || high_bits == 0xffffffff)
2183 analyze_64bit_constant (high_bits, low_bits,
2184 &highest_bit_set, &lowest_bit_set,
2185 &all_bits_between_are_set);
2187 if ((highest_bit_set == 63
2188 || lowest_bit_set == 0)
2189 && all_bits_between_are_set != 0)
2192 if ((highest_bit_set - lowest_bit_set) < 21)
2198 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2199 unsigned HOST_WIDE_INT,
2202 static unsigned HOST_WIDE_INT
2203 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2204 unsigned HOST_WIDE_INT low_bits,
2205 int lowest_bit_set, int shift)
2207 HOST_WIDE_INT hi, lo;
2209 if (lowest_bit_set < 32)
2211 lo = (low_bits >> lowest_bit_set) << shift;
2212 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2217 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2219 gcc_assert (! (hi & lo));
2223 /* Here we are sure to be arch64 and this is an integer constant
2224 being loaded into a register. Emit the most efficient
2225 insn sequence possible. Detection of all the 1-insn cases
2226 has been done already. */
2228 sparc_emit_set_const64 (rtx op0, rtx op1)
2230 unsigned HOST_WIDE_INT high_bits, low_bits;
2231 int lowest_bit_set, highest_bit_set;
2232 int all_bits_between_are_set;
2235 /* Sanity check that we know what we are working with. */
2236 gcc_assert (TARGET_ARCH64
2237 && (GET_CODE (op0) == SUBREG
2238 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2240 if (! can_create_pseudo_p ())
2243 if (GET_CODE (op1) != CONST_INT)
2245 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2250 temp = gen_reg_rtx (DImode);
2252 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2253 low_bits = (INTVAL (op1) & 0xffffffff);
2255 /* low_bits bits 0 --> 31
2256 high_bits bits 32 --> 63 */
2258 analyze_64bit_constant (high_bits, low_bits,
2259 &highest_bit_set, &lowest_bit_set,
2260 &all_bits_between_are_set);
2262 /* First try for a 2-insn sequence. */
2264 /* These situations are preferred because the optimizer can
2265 * do more things with them:
2267 * sllx %reg, shift, %reg
2269 * srlx %reg, shift, %reg
2270 * 3) mov some_small_const, %reg
2271 * sllx %reg, shift, %reg
2273 if (((highest_bit_set == 63
2274 || lowest_bit_set == 0)
2275 && all_bits_between_are_set != 0)
2276 || ((highest_bit_set - lowest_bit_set) < 12))
2278 HOST_WIDE_INT the_const = -1;
2279 int shift = lowest_bit_set;
2281 if ((highest_bit_set != 63
2282 && lowest_bit_set != 0)
2283 || all_bits_between_are_set == 0)
2286 create_simple_focus_bits (high_bits, low_bits,
2289 else if (lowest_bit_set == 0)
2290 shift = -(63 - highest_bit_set);
2292 gcc_assert (SPARC_SIMM13_P (the_const));
2293 gcc_assert (shift != 0);
2295 emit_insn (gen_safe_SET64 (temp, the_const));
2297 emit_insn (gen_rtx_SET (VOIDmode,
2299 gen_rtx_ASHIFT (DImode,
2303 emit_insn (gen_rtx_SET (VOIDmode,
2305 gen_rtx_LSHIFTRT (DImode,
2307 GEN_INT (-shift))));
2311 /* Now a range of 22 or less bits set somewhere.
2312 * 1) sethi %hi(focus_bits), %reg
2313 * sllx %reg, shift, %reg
2314 * 2) sethi %hi(focus_bits), %reg
2315 * srlx %reg, shift, %reg
2317 if ((highest_bit_set - lowest_bit_set) < 21)
2319 unsigned HOST_WIDE_INT focus_bits =
2320 create_simple_focus_bits (high_bits, low_bits,
2321 lowest_bit_set, 10);
2323 gcc_assert (SPARC_SETHI_P (focus_bits));
2324 gcc_assert (lowest_bit_set != 10);
2326 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2328 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2329 if (lowest_bit_set < 10)
2330 emit_insn (gen_rtx_SET (VOIDmode,
2332 gen_rtx_LSHIFTRT (DImode, temp,
2333 GEN_INT (10 - lowest_bit_set))));
2334 else if (lowest_bit_set > 10)
2335 emit_insn (gen_rtx_SET (VOIDmode,
2337 gen_rtx_ASHIFT (DImode, temp,
2338 GEN_INT (lowest_bit_set - 10))));
2342 /* 1) sethi %hi(low_bits), %reg
2343 * or %reg, %lo(low_bits), %reg
2344 * 2) sethi %hi(~low_bits), %reg
2345 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2348 || high_bits == 0xffffffff)
2350 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2351 (high_bits == 0xffffffff));
2355 /* Now, try 3-insn sequences. */
2357 /* 1) sethi %hi(high_bits), %reg
2358 * or %reg, %lo(high_bits), %reg
2359 * sllx %reg, 32, %reg
2363 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2367 /* We may be able to do something quick
2368 when the constant is negated, so try that. */
2369 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2370 (~low_bits) & 0xfffffc00))
2372 /* NOTE: The trailing bits get XOR'd so we need the
2373 non-negated bits, not the negated ones. */
2374 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2376 if ((((~high_bits) & 0xffffffff) == 0
2377 && ((~low_bits) & 0x80000000) == 0)
2378 || (((~high_bits) & 0xffffffff) == 0xffffffff
2379 && ((~low_bits) & 0x80000000) != 0))
2381 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2383 if ((SPARC_SETHI_P (fast_int)
2384 && (~high_bits & 0xffffffff) == 0)
2385 || SPARC_SIMM13_P (fast_int))
2386 emit_insn (gen_safe_SET64 (temp, fast_int));
2388 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2393 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2394 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2395 sparc_emit_set_const64 (temp, negated_const);
2398 /* If we are XOR'ing with -1, then we should emit a one's complement
2399 instead. This way the combiner will notice logical operations
2400 such as ANDN later on and substitute. */
2401 if (trailing_bits == 0x3ff)
2403 emit_insn (gen_rtx_SET (VOIDmode, op0,
2404 gen_rtx_NOT (DImode, temp)));
2408 emit_insn (gen_rtx_SET (VOIDmode,
2410 gen_safe_XOR64 (temp,
2411 (-0x400 | trailing_bits))));
2416 /* 1) sethi %hi(xxx), %reg
2417 * or %reg, %lo(xxx), %reg
2418 * sllx %reg, yyy, %reg
2420 * ??? This is just a generalized version of the low_bits==0
2421 * thing above, FIXME...
2423 if ((highest_bit_set - lowest_bit_set) < 32)
2425 unsigned HOST_WIDE_INT focus_bits =
2426 create_simple_focus_bits (high_bits, low_bits,
2429 /* We can't get here in this state. */
2430 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2432 /* So what we know is that the set bits straddle the
2433 middle of the 64-bit word. */
2434 sparc_emit_set_const64_quick2 (op0, temp,
2440 /* 1) sethi %hi(high_bits), %reg
2441 * or %reg, %lo(high_bits), %reg
2442 * sllx %reg, 32, %reg
2443 * or %reg, low_bits, %reg
2445 if (SPARC_SIMM13_P(low_bits)
2446 && ((int)low_bits > 0))
2448 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2452 /* The easiest way when all else fails, is full decomposition. */
2453 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2455 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2457 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2458 return the mode to be used for the comparison. For floating-point,
2459 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2460 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2461 processing is needed. */
2464 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2466 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2492 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2493 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2495 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2496 return CCX_NOOVmode;
2502 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2509 /* Emit the compare insn and return the CC reg for a CODE comparison
2510 with operands X and Y. */
2513 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2515 enum machine_mode mode;
2518 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2521 mode = SELECT_CC_MODE (code, x, y);
2523 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2524 fcc regs (cse can't tell they're really call clobbered regs and will
2525 remove a duplicate comparison even if there is an intervening function
2526 call - it will then try to reload the cc reg via an int reg which is why
2527 we need the movcc patterns). It is possible to provide the movcc
2528 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2529 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2530 to tell cse that CCFPE mode registers (even pseudos) are call
2533 /* ??? This is an experiment. Rather than making changes to cse which may
2534 or may not be easy/clean, we do our own cse. This is possible because
2535 we will generate hard registers. Cse knows they're call clobbered (it
2536 doesn't know the same thing about pseudos). If we guess wrong, no big
2537 deal, but if we win, great! */
2539 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2540 #if 1 /* experiment */
2543 /* We cycle through the registers to ensure they're all exercised. */
2544 static int next_fcc_reg = 0;
2545 /* Previous x,y for each fcc reg. */
2546 static rtx prev_args[4][2];
2548 /* Scan prev_args for x,y. */
2549 for (reg = 0; reg < 4; reg++)
2550 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2555 prev_args[reg][0] = x;
2556 prev_args[reg][1] = y;
2557 next_fcc_reg = (next_fcc_reg + 1) & 3;
2559 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2562 cc_reg = gen_reg_rtx (mode);
2563 #endif /* ! experiment */
2564 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2565 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2567 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2569 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2570 will only result in an unrecognizable insn so no point in asserting. */
2571 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2577 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2580 gen_compare_reg (rtx cmp)
2582 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2585 /* This function is used for v9 only.
2586 DEST is the target of the Scc insn.
2587 CODE is the code for an Scc's comparison.
2588 X and Y are the values we compare.
2590 This function is needed to turn
2593 (gt (reg:CCX 100 %icc)
2597 (gt:DI (reg:CCX 100 %icc)
2600 IE: The instruction recognizer needs to see the mode of the comparison to
2601 find the right instruction. We could use "gt:DI" right in the
2602 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2605 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2608 && (GET_MODE (x) == DImode
2609 || GET_MODE (dest) == DImode))
2612 /* Try to use the movrCC insns. */
2614 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2616 && v9_regcmp_p (compare_code))
2621 /* Special case for op0 != 0. This can be done with one instruction if
2624 if (compare_code == NE
2625 && GET_MODE (dest) == DImode
2626 && rtx_equal_p (op0, dest))
2628 emit_insn (gen_rtx_SET (VOIDmode, dest,
2629 gen_rtx_IF_THEN_ELSE (DImode,
2630 gen_rtx_fmt_ee (compare_code, DImode,
2637 if (reg_overlap_mentioned_p (dest, op0))
2639 /* Handle the case where dest == x.
2640 We "early clobber" the result. */
2641 op0 = gen_reg_rtx (GET_MODE (x));
2642 emit_move_insn (op0, x);
2645 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2646 if (GET_MODE (op0) != DImode)
2648 temp = gen_reg_rtx (DImode);
2649 convert_move (temp, op0, 0);
2653 emit_insn (gen_rtx_SET (VOIDmode, dest,
2654 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2655 gen_rtx_fmt_ee (compare_code, DImode,
2663 x = gen_compare_reg_1 (compare_code, x, y);
2666 gcc_assert (GET_MODE (x) != CC_NOOVmode
2667 && GET_MODE (x) != CCX_NOOVmode);
2669 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2670 emit_insn (gen_rtx_SET (VOIDmode, dest,
2671 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2672 gen_rtx_fmt_ee (compare_code,
2673 GET_MODE (x), x, y),
2674 const1_rtx, dest)));
2680 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2681 without jumps using the addx/subx instructions. */
2684 emit_scc_insn (rtx operands[])
2691 /* The quad-word fp compare library routines all return nonzero to indicate
2692 true, which is different from the equivalent libgcc routines, so we must
2693 handle them specially here. */
2694 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2696 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2697 GET_CODE (operands[1]));
2698 operands[2] = XEXP (operands[1], 0);
2699 operands[3] = XEXP (operands[1], 1);
2702 code = GET_CODE (operands[1]);
2706 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2707 more applications). The exception to this is "reg != 0" which can
2708 be done in one instruction on v9 (so we do it). */
2711 if (GET_MODE (x) == SImode)
2715 pat = gen_seqsidi_special (operands[0], x, y);
2717 pat = gen_seqsisi_special (operands[0], x, y);
2721 else if (GET_MODE (x) == DImode)
2723 rtx pat = gen_seqdi_special (operands[0], x, y);
2731 if (GET_MODE (x) == SImode)
2735 pat = gen_snesidi_special (operands[0], x, y);
2737 pat = gen_snesisi_special (operands[0], x, y);
2741 else if (GET_MODE (x) == DImode)
2745 pat = gen_snedi_special_vis3 (operands[0], x, y);
2747 pat = gen_snedi_special (operands[0], x, y);
2755 && GET_MODE (x) == DImode
2757 && (code == GTU || code == LTU))
2758 && gen_v9_scc (operands[0], code, x, y))
2761 /* We can do LTU and GEU using the addx/subx instructions too. And
2762 for GTU/LEU, if both operands are registers swap them and fall
2763 back to the easy case. */
2764 if (code == GTU || code == LEU)
2766 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2767 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2772 code = swap_condition (code);
2777 || (!TARGET_VIS3 && code == GEU))
2779 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2780 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
2781 gen_compare_reg_1 (code, x, y),
2786 /* All the posibilities to use addx/subx based sequences has been
2787 exhausted, try for a 3 instruction sequence using v9 conditional
2789 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2792 /* Nope, do branches. */
2796 /* Emit a conditional jump insn for the v9 architecture using comparison code
2797 CODE and jump target LABEL.
2798 This function exists to take advantage of the v9 brxx insns. */
2801 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2803 emit_jump_insn (gen_rtx_SET (VOIDmode,
2805 gen_rtx_IF_THEN_ELSE (VOIDmode,
2806 gen_rtx_fmt_ee (code, GET_MODE (op0),
2808 gen_rtx_LABEL_REF (VOIDmode, label),
2812 /* Emit a conditional jump insn for the UA2011 architecture using
2813 comparison code CODE and jump target LABEL. This function exists
2814 to take advantage of the UA2011 Compare and Branch insns. */
2817 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
2821 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
2822 gen_rtx_fmt_ee(code, GET_MODE(op0),
2824 gen_rtx_LABEL_REF (VOIDmode, label),
2827 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
2831 emit_conditional_branch_insn (rtx operands[])
2833 /* The quad-word fp compare library routines all return nonzero to indicate
2834 true, which is different from the equivalent libgcc routines, so we must
2835 handle them specially here. */
2836 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
2838 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
2839 GET_CODE (operands[0]));
2840 operands[1] = XEXP (operands[0], 0);
2841 operands[2] = XEXP (operands[0], 1);
2844 /* If we can tell early on that the comparison is against a constant
2845 that won't fit in the 5-bit signed immediate field of a cbcond,
2846 use one of the other v9 conditional branch sequences. */
2848 && GET_CODE (operands[1]) == REG
2849 && (GET_MODE (operands[1]) == SImode
2850 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
2851 && (GET_CODE (operands[2]) != CONST_INT
2852 || SPARC_SIMM5_P (INTVAL (operands[2]))))
2854 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
2858 if (TARGET_ARCH64 && operands[2] == const0_rtx
2859 && GET_CODE (operands[1]) == REG
2860 && GET_MODE (operands[1]) == DImode)
2862 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
2866 operands[1] = gen_compare_reg (operands[0]);
2867 operands[2] = const0_rtx;
2868 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
2869 operands[1], operands[2]);
2870 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
2875 /* Generate a DFmode part of a hard TFmode register.
2876 REG is the TFmode hard register, LOW is 1 for the
2877 low 64bit of the register and 0 otherwise.
2880 gen_df_reg (rtx reg, int low)
2882 int regno = REGNO (reg);
2884 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
2885 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
2886 return gen_rtx_REG (DFmode, regno);
2889 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
2890 Unlike normal calls, TFmode operands are passed by reference. It is
2891 assumed that no more than 3 operands are required. */
2894 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
2896 rtx ret_slot = NULL, arg[3], func_sym;
2899 /* We only expect to be called for conversions, unary, and binary ops. */
2900 gcc_assert (nargs == 2 || nargs == 3);
2902 for (i = 0; i < nargs; ++i)
2904 rtx this_arg = operands[i];
2907 /* TFmode arguments and return values are passed by reference. */
2908 if (GET_MODE (this_arg) == TFmode)
2910 int force_stack_temp;
2912 force_stack_temp = 0;
2913 if (TARGET_BUGGY_QP_LIB && i == 0)
2914 force_stack_temp = 1;
2916 if (GET_CODE (this_arg) == MEM
2917 && ! force_stack_temp)
2919 tree expr = MEM_EXPR (this_arg);
2921 mark_addressable (expr);
2922 this_arg = XEXP (this_arg, 0);
2924 else if (CONSTANT_P (this_arg)
2925 && ! force_stack_temp)
2927 this_slot = force_const_mem (TFmode, this_arg);
2928 this_arg = XEXP (this_slot, 0);
2932 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
2934 /* Operand 0 is the return value. We'll copy it out later. */
2936 emit_move_insn (this_slot, this_arg);
2938 ret_slot = this_slot;
2940 this_arg = XEXP (this_slot, 0);
2947 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
2949 if (GET_MODE (operands[0]) == TFmode)
2952 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
2953 arg[0], GET_MODE (arg[0]),
2954 arg[1], GET_MODE (arg[1]));
2956 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
2957 arg[0], GET_MODE (arg[0]),
2958 arg[1], GET_MODE (arg[1]),
2959 arg[2], GET_MODE (arg[2]));
2962 emit_move_insn (operands[0], ret_slot);
2968 gcc_assert (nargs == 2);
2970 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
2971 GET_MODE (operands[0]), 1,
2972 arg[1], GET_MODE (arg[1]));
2974 if (ret != operands[0])
2975 emit_move_insn (operands[0], ret);
2979 /* Expand soft-float TFmode calls to sparc abi routines. */
2982 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3004 emit_soft_tfmode_libcall (func, 3, operands);
3008 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3012 gcc_assert (code == SQRT);
3015 emit_soft_tfmode_libcall (func, 2, operands);
3019 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3026 switch (GET_MODE (operands[1]))
3039 case FLOAT_TRUNCATE:
3040 switch (GET_MODE (operands[0]))
3054 switch (GET_MODE (operands[1]))
3059 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3069 case UNSIGNED_FLOAT:
3070 switch (GET_MODE (operands[1]))
3075 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3086 switch (GET_MODE (operands[0]))
3100 switch (GET_MODE (operands[0]))
3117 emit_soft_tfmode_libcall (func, 2, operands);
3120 /* Expand a hard-float tfmode operation. All arguments must be in
3124 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3128 if (GET_RTX_CLASS (code) == RTX_UNARY)
3130 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3131 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3135 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3136 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3137 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3138 operands[1], operands[2]);
3141 if (register_operand (operands[0], VOIDmode))
3144 dest = gen_reg_rtx (GET_MODE (operands[0]));
3146 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3148 if (dest != operands[0])
3149 emit_move_insn (operands[0], dest);
3153 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3155 if (TARGET_HARD_QUAD)
3156 emit_hard_tfmode_operation (code, operands);
3158 emit_soft_tfmode_binop (code, operands);
3162 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3164 if (TARGET_HARD_QUAD)
3165 emit_hard_tfmode_operation (code, operands);
3167 emit_soft_tfmode_unop (code, operands);
3171 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3173 if (TARGET_HARD_QUAD)
3174 emit_hard_tfmode_operation (code, operands);
3176 emit_soft_tfmode_cvt (code, operands);
3179 /* Return nonzero if a branch/jump/call instruction will be emitting
3180 nop into its delay slot. */
3183 empty_delay_slot (rtx insn)
3187 /* If no previous instruction (should not happen), return true. */
3188 if (PREV_INSN (insn) == NULL)
3191 seq = NEXT_INSN (PREV_INSN (insn));
3192 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3198 /* Return nonzero if we should emit a nop after a cbcond instruction.
3199 The cbcond instruction does not have a delay slot, however there is
3200 a severe performance penalty if a control transfer appears right
3201 after a cbcond. Therefore we emit a nop when we detect this
3205 emit_cbcond_nop (rtx insn)
3207 rtx next = next_active_insn (insn);
3212 if (NONJUMP_INSN_P (next)
3213 && GET_CODE (PATTERN (next)) == SEQUENCE)
3214 next = XVECEXP (PATTERN (next), 0, 0);
3215 else if (CALL_P (next)
3216 && GET_CODE (PATTERN (next)) == PARALLEL)
3218 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3220 if (GET_CODE (delay) == RETURN)
3222 /* It's a sibling call. Do not emit the nop if we're going
3223 to emit something other than the jump itself as the first
3224 instruction of the sibcall sequence. */
3225 if (sparc_leaf_function_p || TARGET_FLAT)
3230 if (NONJUMP_INSN_P (next))
3236 /* Return nonzero if TRIAL can go into the call delay slot. */
3239 tls_call_delay (rtx trial)
3244 call __tls_get_addr, %tgd_call (foo)
3245 add %l7, %o0, %o0, %tgd_add (foo)
3246 while Sun as/ld does not. */
3247 if (TARGET_GNU_TLS || !TARGET_TLS)
3250 pat = PATTERN (trial);
3252 /* We must reject tgd_add{32|64}, i.e.
3253 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3254 and tldm_add{32|64}, i.e.
3255 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3257 if (GET_CODE (pat) == SET
3258 && GET_CODE (SET_SRC (pat)) == PLUS)
3260 rtx unspec = XEXP (SET_SRC (pat), 1);
3262 if (GET_CODE (unspec) == UNSPEC
3263 && (XINT (unspec, 1) == UNSPEC_TLSGD
3264 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3271 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3272 instruction. RETURN_P is true if the v9 variant 'return' is to be
3273 considered in the test too.
3275 TRIAL must be a SET whose destination is a REG appropriate for the
3276 'restore' instruction or, if RETURN_P is true, for the 'return'
3280 eligible_for_restore_insn (rtx trial, bool return_p)
3282 rtx pat = PATTERN (trial);
3283 rtx src = SET_SRC (pat);
3284 bool src_is_freg = false;
3287 /* Since we now can do moves between float and integer registers when
3288 VIS3 is enabled, we have to catch this case. We can allow such
3289 moves when doing a 'return' however. */
3291 if (GET_CODE (src_reg) == SUBREG)
3292 src_reg = SUBREG_REG (src_reg);
3293 if (GET_CODE (src_reg) == REG
3294 && SPARC_FP_REG_P (REGNO (src_reg)))
3297 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3298 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3299 && arith_operand (src, GET_MODE (src))
3303 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3305 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3308 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3309 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3310 && arith_double_operand (src, GET_MODE (src))
3312 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3314 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3315 else if (! TARGET_FPU && register_operand (src, SFmode))
3318 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3319 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3322 /* If we have the 'return' instruction, anything that does not use
3323 local or output registers and can go into a delay slot wins. */
3326 && !epilogue_renumber (&pat, 1)
3327 && get_attr_in_uncond_branch_delay (trial)
3328 == IN_UNCOND_BRANCH_DELAY_TRUE)
3331 /* The 'restore src1,src2,dest' pattern for SImode. */
3332 else if (GET_CODE (src) == PLUS
3333 && register_operand (XEXP (src, 0), SImode)
3334 && arith_operand (XEXP (src, 1), SImode))
3337 /* The 'restore src1,src2,dest' pattern for DImode. */
3338 else if (GET_CODE (src) == PLUS
3339 && register_operand (XEXP (src, 0), DImode)
3340 && arith_double_operand (XEXP (src, 1), DImode))
3343 /* The 'restore src1,%lo(src2),dest' pattern. */
3344 else if (GET_CODE (src) == LO_SUM
3345 && ! TARGET_CM_MEDMID
3346 && ((register_operand (XEXP (src, 0), SImode)
3347 && immediate_operand (XEXP (src, 1), SImode))
3349 && register_operand (XEXP (src, 0), DImode)
3350 && immediate_operand (XEXP (src, 1), DImode))))
3353 /* The 'restore src,src,dest' pattern. */
3354 else if (GET_CODE (src) == ASHIFT
3355 && (register_operand (XEXP (src, 0), SImode)
3356 || register_operand (XEXP (src, 0), DImode))
3357 && XEXP (src, 1) == const1_rtx)
3363 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3366 eligible_for_return_delay (rtx trial)
3371 if (! NONJUMP_INSN_P (trial))
3374 if (get_attr_length (trial) != 1)
3377 /* If the function uses __builtin_eh_return, the eh_return machinery
3378 occupies the delay slot. */
3379 if (crtl->calls_eh_return)
3382 /* In the case of a leaf or flat function, anything can go into the slot. */
3383 if (sparc_leaf_function_p || TARGET_FLAT)
3385 get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
3387 pat = PATTERN (trial);
3388 if (GET_CODE (pat) == PARALLEL)
3394 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3396 rtx expr = XVECEXP (pat, 0, i);
3397 if (GET_CODE (expr) != SET)
3399 if (GET_CODE (SET_DEST (expr)) != REG)
3401 regno = REGNO (SET_DEST (expr));
3402 if (regno >= 8 && regno < 24)
3405 return !epilogue_renumber (&pat, 1)
3406 && (get_attr_in_uncond_branch_delay (trial)
3407 == IN_UNCOND_BRANCH_DELAY_TRUE);
3410 if (GET_CODE (pat) != SET)
3413 if (GET_CODE (SET_DEST (pat)) != REG)
3416 regno = REGNO (SET_DEST (pat));
3418 /* Otherwise, only operations which can be done in tandem with
3419 a `restore' or `return' insn can go into the delay slot. */
3420 if (regno >= 8 && regno < 24)
3423 /* If this instruction sets up floating point register and we have a return
3424 instruction, it can probably go in. But restore will not work
3426 if (! SPARC_INT_REG_P (regno))
3428 && !epilogue_renumber (&pat, 1)
3429 && get_attr_in_uncond_branch_delay (trial)
3430 == IN_UNCOND_BRANCH_DELAY_TRUE);
3432 return eligible_for_restore_insn (trial, true);
3435 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3438 eligible_for_sibcall_delay (rtx trial)
3442 if (! NONJUMP_INSN_P (trial) || GET_CODE (PATTERN (trial)) != SET)
3445 if (get_attr_length (trial) != 1)
3448 pat = PATTERN (trial);
3450 if (sparc_leaf_function_p || TARGET_FLAT)
3452 /* If the tail call is done using the call instruction,
3453 we have to restore %o7 in the delay slot. */
3454 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3457 /* %g1 is used to build the function address */
3458 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3464 /* Otherwise, only operations which can be done in tandem with
3465 a `restore' insn can go into the delay slot. */
3466 if (GET_CODE (SET_DEST (pat)) != REG
3467 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3468 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3471 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3473 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3476 return eligible_for_restore_insn (trial, false);
3479 /* Determine if it's legal to put X into the constant pool. This
3480 is not possible if X contains the address of a symbol that is
3481 not constant (TLS) or not known at final link time (PIC). */
3484 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3486 switch (GET_CODE (x))
3491 /* Accept all non-symbolic constants. */
3495 /* Labels are OK iff we are non-PIC. */
3496 return flag_pic != 0;
3499 /* 'Naked' TLS symbol references are never OK,
3500 non-TLS symbols are OK iff we are non-PIC. */
3501 if (SYMBOL_REF_TLS_MODEL (x))
3504 return flag_pic != 0;
3507 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3510 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3511 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3519 /* Global Offset Table support. */
3520 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3521 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3523 /* Return the SYMBOL_REF for the Global Offset Table. */
3525 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3530 if (!sparc_got_symbol)
3531 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3533 return sparc_got_symbol;
3536 /* Ensure that we are not using patterns that are not OK with PIC. */
3546 op = recog_data.operand[i];
3547 gcc_assert (GET_CODE (op) != SYMBOL_REF
3548 && (GET_CODE (op) != CONST
3549 || (GET_CODE (XEXP (op, 0)) == MINUS
3550 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3551 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3558 /* Return true if X is an address which needs a temporary register when
3559 reloaded while generating PIC code. */
3562 pic_address_needs_scratch (rtx x)
3564 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3565 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3566 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3567 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3568 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3574 /* Determine if a given RTX is a valid constant. We already know this
3575 satisfies CONSTANT_P. */
3578 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3580 switch (GET_CODE (x))
3584 if (sparc_tls_referenced_p (x))
3589 if (GET_MODE (x) == VOIDmode)
3592 /* Floating point constants are generally not ok.
3593 The only exception is 0.0 and all-ones in VIS. */
3595 && SCALAR_FLOAT_MODE_P (mode)
3596 && (const_zero_operand (x, mode)
3597 || const_all_ones_operand (x, mode)))
3603 /* Vector constants are generally not ok.
3604 The only exception is 0 or -1 in VIS. */
3606 && (const_zero_operand (x, mode)
3607 || const_all_ones_operand (x, mode)))
3619 /* Determine if a given RTX is a valid constant address. */
3622 constant_address_p (rtx x)
3624 switch (GET_CODE (x))
3632 if (flag_pic && pic_address_needs_scratch (x))
3634 return sparc_legitimate_constant_p (Pmode, x);
3637 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3644 /* Nonzero if the constant value X is a legitimate general operand
3645 when generating PIC code. It is given that flag_pic is on and
3646 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3649 legitimate_pic_operand_p (rtx x)
3651 if (pic_address_needs_scratch (x))
3653 if (sparc_tls_referenced_p (x))
3658 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3660 && INTVAL (X) >= -0x1000 \
3661 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3663 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3665 && INTVAL (X) >= -0x1000 \
3666 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3668 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3670 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3671 ordinarily. This changes a bit when generating PIC. */
3674 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3676 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3678 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3680 else if (GET_CODE (addr) == PLUS)
3682 rs1 = XEXP (addr, 0);
3683 rs2 = XEXP (addr, 1);
3685 /* Canonicalize. REG comes first, if there are no regs,
3686 LO_SUM comes first. */
3688 && GET_CODE (rs1) != SUBREG
3690 || GET_CODE (rs2) == SUBREG
3691 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3693 rs1 = XEXP (addr, 1);
3694 rs2 = XEXP (addr, 0);
3698 && rs1 == pic_offset_table_rtx
3700 && GET_CODE (rs2) != SUBREG
3701 && GET_CODE (rs2) != LO_SUM
3702 && GET_CODE (rs2) != MEM
3703 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3704 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3705 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3707 || GET_CODE (rs1) == SUBREG)
3708 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3713 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3714 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3716 /* We prohibit REG + REG for TFmode when there are no quad move insns
3717 and we consequently need to split. We do this because REG+REG
3718 is not an offsettable address. If we get the situation in reload
3719 where source and destination of a movtf pattern are both MEMs with
3720 REG+REG address, then only one of them gets converted to an
3721 offsettable address. */
3723 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3726 /* Likewise for TImode, but in all cases. */
3730 /* We prohibit REG + REG on ARCH32 if not optimizing for
3731 DFmode/DImode because then mem_min_alignment is likely to be zero
3732 after reload and the forced split would lack a matching splitter
3734 if (TARGET_ARCH32 && !optimize
3735 && (mode == DFmode || mode == DImode))
3738 else if (USE_AS_OFFSETABLE_LO10
3739 && GET_CODE (rs1) == LO_SUM
3741 && ! TARGET_CM_MEDMID
3742 && RTX_OK_FOR_OLO10_P (rs2, mode))
3745 imm1 = XEXP (rs1, 1);
3746 rs1 = XEXP (rs1, 0);
3747 if (!CONSTANT_P (imm1)
3748 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3752 else if (GET_CODE (addr) == LO_SUM)
3754 rs1 = XEXP (addr, 0);
3755 imm1 = XEXP (addr, 1);
3757 if (!CONSTANT_P (imm1)
3758 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3761 /* We can't allow TFmode in 32-bit mode, because an offset greater
3762 than the alignment (8) may cause the LO_SUM to overflow. */
3763 if (mode == TFmode && TARGET_ARCH32)
3766 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3771 if (GET_CODE (rs1) == SUBREG)
3772 rs1 = SUBREG_REG (rs1);
3778 if (GET_CODE (rs2) == SUBREG)
3779 rs2 = SUBREG_REG (rs2);
3786 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3787 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3792 if ((! SPARC_INT_REG_P (REGNO (rs1))
3793 && REGNO (rs1) != FRAME_POINTER_REGNUM
3794 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3796 && (! SPARC_INT_REG_P (REGNO (rs2))
3797 && REGNO (rs2) != FRAME_POINTER_REGNUM
3798 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3804 /* Return the SYMBOL_REF for the tls_get_addr function. */
3806 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3809 sparc_tls_get_addr (void)
3811 if (!sparc_tls_symbol)
3812 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
3814 return sparc_tls_symbol;
3817 /* Return the Global Offset Table to be used in TLS mode. */
3820 sparc_tls_got (void)
3822 /* In PIC mode, this is just the PIC offset table. */
3825 crtl->uses_pic_offset_table = 1;
3826 return pic_offset_table_rtx;
3829 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
3830 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
3831 if (TARGET_SUN_TLS && TARGET_ARCH32)
3833 load_got_register ();
3834 return global_offset_table_rtx;
3837 /* In all other cases, we load a new pseudo with the GOT symbol. */
3838 return copy_to_reg (sparc_got ());
3841 /* Return true if X contains a thread-local symbol. */
3844 sparc_tls_referenced_p (rtx x)
3846 if (!TARGET_HAVE_TLS)
3849 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3850 x = XEXP (XEXP (x, 0), 0);
3852 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
3855 /* That's all we handle in sparc_legitimize_tls_address for now. */
3859 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3860 this (thread-local) address. */
3863 sparc_legitimize_tls_address (rtx addr)
3865 rtx temp1, temp2, temp3, ret, o0, got, insn;
3867 gcc_assert (can_create_pseudo_p ());
3869 if (GET_CODE (addr) == SYMBOL_REF)
3870 switch (SYMBOL_REF_TLS_MODEL (addr))
3872 case TLS_MODEL_GLOBAL_DYNAMIC:
3874 temp1 = gen_reg_rtx (SImode);
3875 temp2 = gen_reg_rtx (SImode);
3876 ret = gen_reg_rtx (Pmode);
3877 o0 = gen_rtx_REG (Pmode, 8);
3878 got = sparc_tls_got ();
3879 emit_insn (gen_tgd_hi22 (temp1, addr));
3880 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
3883 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
3884 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
3889 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
3890 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
3893 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3894 insn = get_insns ();
3896 emit_libcall_block (insn, ret, o0, addr);
3899 case TLS_MODEL_LOCAL_DYNAMIC:
3901 temp1 = gen_reg_rtx (SImode);
3902 temp2 = gen_reg_rtx (SImode);
3903 temp3 = gen_reg_rtx (Pmode);
3904 ret = gen_reg_rtx (Pmode);
3905 o0 = gen_rtx_REG (Pmode, 8);
3906 got = sparc_tls_got ();
3907 emit_insn (gen_tldm_hi22 (temp1));
3908 emit_insn (gen_tldm_lo10 (temp2, temp1));
3911 emit_insn (gen_tldm_add32 (o0, got, temp2));
3912 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
3917 emit_insn (gen_tldm_add64 (o0, got, temp2));
3918 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
3921 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
3922 insn = get_insns ();
3924 emit_libcall_block (insn, temp3, o0,
3925 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
3926 UNSPEC_TLSLD_BASE));
3927 temp1 = gen_reg_rtx (SImode);
3928 temp2 = gen_reg_rtx (SImode);
3929 emit_insn (gen_tldo_hix22 (temp1, addr));
3930 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
3932 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
3934 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
3937 case TLS_MODEL_INITIAL_EXEC:
3938 temp1 = gen_reg_rtx (SImode);
3939 temp2 = gen_reg_rtx (SImode);
3940 temp3 = gen_reg_rtx (Pmode);
3941 got = sparc_tls_got ();
3942 emit_insn (gen_tie_hi22 (temp1, addr));
3943 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
3945 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
3947 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
3950 ret = gen_reg_rtx (Pmode);
3952 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
3955 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
3959 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
3962 case TLS_MODEL_LOCAL_EXEC:
3963 temp1 = gen_reg_rtx (Pmode);
3964 temp2 = gen_reg_rtx (Pmode);
3967 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
3968 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
3972 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
3973 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
3975 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
3982 else if (GET_CODE (addr) == CONST)
3986 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
3988 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
3989 offset = XEXP (XEXP (addr, 0), 1);
3991 base = force_operand (base, NULL_RTX);
3992 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
3993 offset = force_reg (Pmode, offset);
3994 ret = gen_rtx_PLUS (Pmode, base, offset);
3998 gcc_unreachable (); /* for now ... */
4003 /* Legitimize PIC addresses. If the address is already position-independent,
4004 we return ORIG. Newly generated position-independent addresses go into a
4005 reg. This is REG if nonzero, otherwise we allocate register(s) as
4009 sparc_legitimize_pic_address (rtx orig, rtx reg)
4011 bool gotdata_op = false;
4013 if (GET_CODE (orig) == SYMBOL_REF
4014 /* See the comment in sparc_expand_move. */
4015 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4017 rtx pic_ref, address;
4022 gcc_assert (can_create_pseudo_p ());
4023 reg = gen_reg_rtx (Pmode);
4028 /* If not during reload, allocate another temp reg here for loading
4029 in the address, so that these instructions can be optimized
4031 rtx temp_reg = (! can_create_pseudo_p ()
4032 ? reg : gen_reg_rtx (Pmode));
4034 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4035 won't get confused into thinking that these two instructions
4036 are loading in the true address of the symbol. If in the
4037 future a PIC rtx exists, that should be used instead. */
4040 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4041 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4045 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4046 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4054 crtl->uses_pic_offset_table = 1;
4058 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4059 pic_offset_table_rtx,
4062 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4063 pic_offset_table_rtx,
4069 = gen_const_mem (Pmode,
4070 gen_rtx_PLUS (Pmode,
4071 pic_offset_table_rtx, address));
4072 insn = emit_move_insn (reg, pic_ref);
4075 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4077 set_unique_reg_note (insn, REG_EQUAL, orig);
4080 else if (GET_CODE (orig) == CONST)
4084 if (GET_CODE (XEXP (orig, 0)) == PLUS
4085 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4090 gcc_assert (can_create_pseudo_p ());
4091 reg = gen_reg_rtx (Pmode);
4094 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4095 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4096 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4097 base == reg ? NULL_RTX : reg);
4099 if (GET_CODE (offset) == CONST_INT)
4101 if (SMALL_INT (offset))
4102 return plus_constant (Pmode, base, INTVAL (offset));
4103 else if (can_create_pseudo_p ())
4104 offset = force_reg (Pmode, offset);
4106 /* If we reach here, then something is seriously wrong. */
4109 return gen_rtx_PLUS (Pmode, base, offset);
4111 else if (GET_CODE (orig) == LABEL_REF)
4112 /* ??? We ought to be checking that the register is live instead, in case
4113 it is eliminated. */
4114 crtl->uses_pic_offset_table = 1;
4119 /* Try machine-dependent ways of modifying an illegitimate address X
4120 to be legitimate. If we find one, return the new, valid address.
4122 OLDX is the address as it was before break_out_memory_refs was called.
4123 In some cases it is useful to look at this to decide what needs to be done.
4125 MODE is the mode of the operand pointed to by X.
4127 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4130 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4131 enum machine_mode mode)
4135 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4136 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4137 force_operand (XEXP (x, 0), NULL_RTX));
4138 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4139 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4140 force_operand (XEXP (x, 1), NULL_RTX));
4141 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4142 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4144 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4145 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4146 force_operand (XEXP (x, 1), NULL_RTX));
4148 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4151 if (sparc_tls_referenced_p (x))
4152 x = sparc_legitimize_tls_address (x);
4154 x = sparc_legitimize_pic_address (x, NULL_RTX);
4155 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4156 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4157 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4158 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4159 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4160 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4161 else if (GET_CODE (x) == SYMBOL_REF
4162 || GET_CODE (x) == CONST
4163 || GET_CODE (x) == LABEL_REF)
4164 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4169 /* Delegitimize an address that was legitimized by the above function. */
4172 sparc_delegitimize_address (rtx x)
4174 x = delegitimize_mem_from_attrs (x);
4176 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4177 switch (XINT (XEXP (x, 1), 1))
4179 case UNSPEC_MOVE_PIC:
4181 x = XVECEXP (XEXP (x, 1), 0, 0);
4182 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4188 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4189 if (GET_CODE (x) == MINUS
4190 && REG_P (XEXP (x, 0))
4191 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4192 && GET_CODE (XEXP (x, 1)) == LO_SUM
4193 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4194 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4196 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4197 gcc_assert (GET_CODE (x) == LABEL_REF);
4203 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4204 replace the input X, or the original X if no replacement is called for.
4205 The output parameter *WIN is 1 if the calling macro should goto WIN,
4208 For SPARC, we wish to handle addresses by splitting them into
4209 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4210 This cuts the number of extra insns by one.
4212 Do nothing when generating PIC code and the address is a symbolic
4213 operand or requires a scratch register. */
4216 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4217 int opnum, int type,
4218 int ind_levels ATTRIBUTE_UNUSED, int *win)
4220 /* Decompose SImode constants into HIGH+LO_SUM. */
4222 && (mode != TFmode || TARGET_ARCH64)
4223 && GET_MODE (x) == SImode
4224 && GET_CODE (x) != LO_SUM
4225 && GET_CODE (x) != HIGH
4226 && sparc_cmodel <= CM_MEDLOW
4228 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4230 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4231 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4232 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4233 opnum, (enum reload_type)type);
4238 /* We have to recognize what we have already generated above. */
4239 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4241 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4242 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4243 opnum, (enum reload_type)type);
4252 /* Return true if ADDR (a legitimate address expression)
4253 has an effect that depends on the machine mode it is used for.
4259 is not equivalent to
4261 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4263 because [%l7+a+1] is interpreted as the address of (a+1). */
4267 sparc_mode_dependent_address_p (const_rtx addr,
4268 addr_space_t as ATTRIBUTE_UNUSED)
4270 if (flag_pic && GET_CODE (addr) == PLUS)
4272 rtx op0 = XEXP (addr, 0);
4273 rtx op1 = XEXP (addr, 1);
4274 if (op0 == pic_offset_table_rtx
4275 && symbolic_operand (op1, VOIDmode))
4282 #ifdef HAVE_GAS_HIDDEN
4283 # define USE_HIDDEN_LINKONCE 1
4285 # define USE_HIDDEN_LINKONCE 0
4289 get_pc_thunk_name (char name[32], unsigned int regno)
4291 const char *reg_name = reg_names[regno];
4293 /* Skip the leading '%' as that cannot be used in a
4297 if (USE_HIDDEN_LINKONCE)
4298 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4300 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4303 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4306 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4308 int orig_flag_pic = flag_pic;
4311 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4314 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4316 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4317 flag_pic = orig_flag_pic;
4322 /* Emit code to load the GOT register. */
4325 load_got_register (void)
4327 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4328 if (!global_offset_table_rtx)
4329 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4331 if (TARGET_VXWORKS_RTP)
4332 emit_insn (gen_vxworks_load_got ());
4335 /* The GOT symbol is subject to a PC-relative relocation so we need a
4336 helper function to add the PC value and thus get the final value. */
4337 if (!got_helper_rtx)
4340 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4341 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4344 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4346 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4349 /* Need to emit this whether or not we obey regdecls,
4350 since setjmp/longjmp can cause life info to screw up.
4351 ??? In the case where we don't obey regdecls, this is not sufficient
4352 since we may not fall out the bottom. */
4353 emit_use (global_offset_table_rtx);
4356 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4357 address of the call target. */
4360 sparc_emit_call_insn (rtx pat, rtx addr)
4364 insn = emit_call_insn (pat);
4366 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4367 if (TARGET_VXWORKS_RTP
4369 && GET_CODE (addr) == SYMBOL_REF
4370 && (SYMBOL_REF_DECL (addr)
4371 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4372 : !SYMBOL_REF_LOCAL_P (addr)))
4374 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4375 crtl->uses_pic_offset_table = 1;
4379 /* Return 1 if RTX is a MEM which is known to be aligned to at
4380 least a DESIRED byte boundary. */
4383 mem_min_alignment (rtx mem, int desired)
4385 rtx addr, base, offset;
4387 /* If it's not a MEM we can't accept it. */
4388 if (GET_CODE (mem) != MEM)
4392 if (!TARGET_UNALIGNED_DOUBLES
4393 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4396 /* ??? The rest of the function predates MEM_ALIGN so
4397 there is probably a bit of redundancy. */
4398 addr = XEXP (mem, 0);
4399 base = offset = NULL_RTX;
4400 if (GET_CODE (addr) == PLUS)
4402 if (GET_CODE (XEXP (addr, 0)) == REG)
4404 base = XEXP (addr, 0);
4406 /* What we are saying here is that if the base
4407 REG is aligned properly, the compiler will make
4408 sure any REG based index upon it will be so
4410 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4411 offset = XEXP (addr, 1);
4413 offset = const0_rtx;
4416 else if (GET_CODE (addr) == REG)
4419 offset = const0_rtx;
4422 if (base != NULL_RTX)
4424 int regno = REGNO (base);
4426 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4428 /* Check if the compiler has recorded some information
4429 about the alignment of the base REG. If reload has
4430 completed, we already matched with proper alignments.
4431 If not running global_alloc, reload might give us
4432 unaligned pointer to local stack though. */
4434 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4435 || (optimize && reload_completed))
4436 && (INTVAL (offset) & (desired - 1)) == 0)
4441 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4445 else if (! TARGET_UNALIGNED_DOUBLES
4446 || CONSTANT_P (addr)
4447 || GET_CODE (addr) == LO_SUM)
4449 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4450 is true, in which case we can only assume that an access is aligned if
4451 it is to a constant address, or the address involves a LO_SUM. */
4455 /* An obviously unaligned address. */
4460 /* Vectors to keep interesting information about registers where it can easily
4461 be got. We used to use the actual mode value as the bit number, but there
4462 are more than 32 modes now. Instead we use two tables: one indexed by
4463 hard register number, and one indexed by mode. */
4465 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4466 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4467 mapped into one sparc_mode_class mode. */
4469 enum sparc_mode_class {
4470 S_MODE, D_MODE, T_MODE, O_MODE,
4471 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4475 /* Modes for single-word and smaller quantities. */
4476 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4478 /* Modes for double-word and smaller quantities. */
4479 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4481 /* Modes for quad-word and smaller quantities. */
4482 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4484 /* Modes for 8-word and smaller quantities. */
4485 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4487 /* Modes for single-float quantities. We must allow any single word or
4488 smaller quantity. This is because the fix/float conversion instructions
4489 take integer inputs/outputs from the float registers. */
4490 #define SF_MODES (S_MODES)
4492 /* Modes for double-float and smaller quantities. */
4493 #define DF_MODES (D_MODES)
4495 /* Modes for quad-float and smaller quantities. */
4496 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4498 /* Modes for quad-float pairs and smaller quantities. */
4499 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4501 /* Modes for double-float only quantities. */
4502 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4504 /* Modes for quad-float and double-float only quantities. */
4505 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4507 /* Modes for quad-float pairs and double-float only quantities. */
4508 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4510 /* Modes for condition codes. */
4511 #define CC_MODES (1 << (int) CC_MODE)
4512 #define CCFP_MODES (1 << (int) CCFP_MODE)
4514 /* Value is 1 if register/mode pair is acceptable on sparc.
4515 The funny mixture of D and T modes is because integer operations
4516 do not specially operate on tetra quantities, so non-quad-aligned
4517 registers can hold quadword quantities (except %o4 and %i4 because
4518 they cross fixed registers). */
4520 /* This points to either the 32 bit or the 64 bit version. */
4521 const int *hard_regno_mode_classes;
4523 static const int hard_32bit_mode_classes[] = {
4524 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4525 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4526 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4527 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4529 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4530 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4531 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4532 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4534 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4535 and none can hold SFmode/SImode values. */
4536 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4537 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4538 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4539 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4542 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4544 /* %icc, %sfp, %gsr */
4545 CC_MODES, 0, D_MODES
4548 static const int hard_64bit_mode_classes[] = {
4549 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4550 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4551 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4552 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4554 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4555 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4556 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4557 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4559 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4560 and none can hold SFmode/SImode values. */
4561 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4562 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4563 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4564 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4567 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4569 /* %icc, %sfp, %gsr */
4570 CC_MODES, 0, D_MODES
4573 int sparc_mode_class [NUM_MACHINE_MODES];
4575 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4578 sparc_init_modes (void)
4582 for (i = 0; i < NUM_MACHINE_MODES; i++)
4584 switch (GET_MODE_CLASS (i))
4587 case MODE_PARTIAL_INT:
4588 case MODE_COMPLEX_INT:
4589 if (GET_MODE_SIZE (i) <= 4)
4590 sparc_mode_class[i] = 1 << (int) S_MODE;
4591 else if (GET_MODE_SIZE (i) == 8)
4592 sparc_mode_class[i] = 1 << (int) D_MODE;
4593 else if (GET_MODE_SIZE (i) == 16)
4594 sparc_mode_class[i] = 1 << (int) T_MODE;
4595 else if (GET_MODE_SIZE (i) == 32)
4596 sparc_mode_class[i] = 1 << (int) O_MODE;
4598 sparc_mode_class[i] = 0;
4600 case MODE_VECTOR_INT:
4601 if (GET_MODE_SIZE (i) <= 4)
4602 sparc_mode_class[i] = 1 << (int)SF_MODE;
4603 else if (GET_MODE_SIZE (i) == 8)
4604 sparc_mode_class[i] = 1 << (int)DF_MODE;
4607 case MODE_COMPLEX_FLOAT:
4608 if (GET_MODE_SIZE (i) <= 4)
4609 sparc_mode_class[i] = 1 << (int) SF_MODE;
4610 else if (GET_MODE_SIZE (i) == 8)
4611 sparc_mode_class[i] = 1 << (int) DF_MODE;
4612 else if (GET_MODE_SIZE (i) == 16)
4613 sparc_mode_class[i] = 1 << (int) TF_MODE;
4614 else if (GET_MODE_SIZE (i) == 32)
4615 sparc_mode_class[i] = 1 << (int) OF_MODE;
4617 sparc_mode_class[i] = 0;
4620 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4621 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4623 sparc_mode_class[i] = 1 << (int) CC_MODE;
4626 sparc_mode_class[i] = 0;
4632 hard_regno_mode_classes = hard_64bit_mode_classes;
4634 hard_regno_mode_classes = hard_32bit_mode_classes;
4636 /* Initialize the array used by REGNO_REG_CLASS. */
4637 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4639 if (i < 16 && TARGET_V8PLUS)
4640 sparc_regno_reg_class[i] = I64_REGS;
4641 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4642 sparc_regno_reg_class[i] = GENERAL_REGS;
4644 sparc_regno_reg_class[i] = FP_REGS;
4646 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4648 sparc_regno_reg_class[i] = FPCC_REGS;
4650 sparc_regno_reg_class[i] = NO_REGS;
4654 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4657 save_global_or_fp_reg_p (unsigned int regno,
4658 int leaf_function ATTRIBUTE_UNUSED)
4660 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4663 /* Return whether the return address register (%i7) is needed. */
4666 return_addr_reg_needed_p (int leaf_function)
4668 /* If it is live, for example because of __builtin_return_address (0). */
4669 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4672 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4674 /* Loading the GOT register clobbers %o7. */
4675 || crtl->uses_pic_offset_table
4676 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4682 /* Return whether REGNO, a local or in register, must be saved/restored. */
4685 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4687 /* General case: call-saved registers live at some point. */
4688 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4691 /* Frame pointer register (%fp) if needed. */
4692 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4695 /* Return address register (%i7) if needed. */
4696 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4699 /* GOT register (%l7) if needed. */
4700 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4703 /* If the function accesses prior frames, the frame pointer and the return
4704 address of the previous frame must be saved on the stack. */
4705 if (crtl->accesses_prior_frames
4706 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4712 /* Compute the frame size required by the function. This function is called
4713 during the reload pass and also by sparc_expand_prologue. */
4716 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4718 HOST_WIDE_INT frame_size, apparent_frame_size;
4719 int args_size, n_global_fp_regs = 0;
4720 bool save_local_in_regs_p = false;
4723 /* If the function allocates dynamic stack space, the dynamic offset is
4724 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4725 if (leaf_function && !cfun->calls_alloca)
4728 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4730 /* Calculate space needed for global registers. */
4732 for (i = 0; i < 8; i++)
4733 if (save_global_or_fp_reg_p (i, 0))
4734 n_global_fp_regs += 2;
4736 for (i = 0; i < 8; i += 2)
4737 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4738 n_global_fp_regs += 2;
4740 /* In the flat window model, find out which local and in registers need to
4741 be saved. We don't reserve space in the current frame for them as they
4742 will be spilled into the register window save area of the caller's frame.
4743 However, as soon as we use this register window save area, we must create
4744 that of the current frame to make it the live one. */
4746 for (i = 16; i < 32; i++)
4747 if (save_local_or_in_reg_p (i, leaf_function))
4749 save_local_in_regs_p = true;
4753 /* Calculate space needed for FP registers. */
4754 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4755 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4756 n_global_fp_regs += 2;
4759 && n_global_fp_regs == 0
4761 && !save_local_in_regs_p)
4762 frame_size = apparent_frame_size = 0;
4765 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4766 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4767 apparent_frame_size += n_global_fp_regs * 4;
4769 /* We need to add the size of the outgoing argument area. */
4770 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4772 /* And that of the register window save area. */
4773 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4775 /* Finally, bump to the appropriate alignment. */
4776 frame_size = SPARC_STACK_ALIGN (frame_size);
4779 /* Set up values for use in prologue and epilogue. */
4780 sparc_frame_size = frame_size;
4781 sparc_apparent_frame_size = apparent_frame_size;
4782 sparc_n_global_fp_regs = n_global_fp_regs;
4783 sparc_save_local_in_regs_p = save_local_in_regs_p;
4788 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4791 sparc_initial_elimination_offset (int to)
4795 if (to == STACK_POINTER_REGNUM)
4796 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4800 offset += SPARC_STACK_BIAS;
4804 /* Output any necessary .register pseudo-ops. */
4807 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
4809 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
4815 /* Check if %g[2367] were used without
4816 .register being printed for them already. */
4817 for (i = 2; i < 8; i++)
4819 if (df_regs_ever_live_p (i)
4820 && ! sparc_hard_reg_printed [i])
4822 sparc_hard_reg_printed [i] = 1;
4823 /* %g7 is used as TLS base register, use #ignore
4824 for it instead of #scratch. */
4825 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
4826 i == 7 ? "ignore" : "scratch");
4833 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
4835 #if PROBE_INTERVAL > 4096
4836 #error Cannot use indexed addressing mode for stack probing
4839 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
4840 inclusive. These are offsets from the current stack pointer.
4842 Note that we don't use the REG+REG addressing mode for the probes because
4843 of the stack bias in 64-bit mode. And it doesn't really buy us anything
4844 so the advantages of having a single code win here. */
4847 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
4849 rtx g1 = gen_rtx_REG (Pmode, 1);
4851 /* See if we have a constant small number of probes to generate. If so,
4852 that's the easy case. */
4853 if (size <= PROBE_INTERVAL)
4855 emit_move_insn (g1, GEN_INT (first));
4856 emit_insn (gen_rtx_SET (VOIDmode, g1,
4857 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4858 emit_stack_probe (plus_constant (Pmode, g1, -size));
4861 /* The run-time loop is made up of 10 insns in the generic case while the
4862 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
4863 else if (size <= 5 * PROBE_INTERVAL)
4867 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
4868 emit_insn (gen_rtx_SET (VOIDmode, g1,
4869 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4870 emit_stack_probe (g1);
4872 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
4873 it exceeds SIZE. If only two probes are needed, this will not
4874 generate any code. Then probe at FIRST + SIZE. */
4875 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
4877 emit_insn (gen_rtx_SET (VOIDmode, g1,
4878 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
4879 emit_stack_probe (g1);
4882 emit_stack_probe (plus_constant (Pmode, g1,
4883 (i - PROBE_INTERVAL) - size));
4886 /* Otherwise, do the same as above, but in a loop. Note that we must be
4887 extra careful with variables wrapping around because we might be at
4888 the very top (or the very bottom) of the address space and we have
4889 to be able to handle this case properly; in particular, we use an
4890 equality test for the loop condition. */
4893 HOST_WIDE_INT rounded_size;
4894 rtx g4 = gen_rtx_REG (Pmode, 4);
4896 emit_move_insn (g1, GEN_INT (first));
4899 /* Step 1: round SIZE to the previous multiple of the interval. */
4901 rounded_size = size & -PROBE_INTERVAL;
4902 emit_move_insn (g4, GEN_INT (rounded_size));
4905 /* Step 2: compute initial and final value of the loop counter. */
4907 /* TEST_ADDR = SP + FIRST. */
4908 emit_insn (gen_rtx_SET (VOIDmode, g1,
4909 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
4911 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
4912 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
4917 while (TEST_ADDR != LAST_ADDR)
4919 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
4923 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
4924 until it is equal to ROUNDED_SIZE. */
4927 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
4929 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
4932 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
4933 that SIZE is equal to ROUNDED_SIZE. */
4935 if (size != rounded_size)
4936 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
4939 /* Make sure nothing is scheduled before we are done. */
4940 emit_insn (gen_blockage ());
4943 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
4944 absolute addresses. */
4947 output_probe_stack_range (rtx reg1, rtx reg2)
4949 static int labelno = 0;
4950 char loop_lab[32], end_lab[32];
4953 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
4954 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
4956 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
4958 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
4961 output_asm_insn ("cmp\t%0, %1", xops);
4963 fputs ("\tbe,pn\t%xcc,", asm_out_file);
4965 fputs ("\tbe\t", asm_out_file);
4966 assemble_name_raw (asm_out_file, end_lab);
4967 fputc ('\n', asm_out_file);
4969 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
4970 xops[1] = GEN_INT (-PROBE_INTERVAL);
4971 output_asm_insn (" add\t%0, %1, %0", xops);
4973 /* Probe at TEST_ADDR and branch. */
4975 fputs ("\tba,pt\t%xcc,", asm_out_file);
4977 fputs ("\tba\t", asm_out_file);
4978 assemble_name_raw (asm_out_file, loop_lab);
4979 fputc ('\n', asm_out_file);
4980 xops[1] = GEN_INT (SPARC_STACK_BIAS);
4981 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
4983 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
4988 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
4989 needed. LOW is supposed to be double-word aligned for 32-bit registers.
4990 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
4991 is the action to be performed if SAVE_P returns true and ACTION_FALSE
4992 the action to be performed if it returns false. Return the new offset. */
4994 typedef bool (*sorr_pred_t) (unsigned int, int);
4995 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
4998 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
4999 int offset, int leaf_function, sorr_pred_t save_p,
5000 sorr_act_t action_true, sorr_act_t action_false)
5005 if (TARGET_ARCH64 && high <= 32)
5009 for (i = low; i < high; i++)
5011 if (save_p (i, leaf_function))
5013 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5015 if (action_true == SORR_SAVE)
5017 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5018 RTX_FRAME_RELATED_P (insn) = 1;
5020 else /* action_true == SORR_RESTORE */
5022 /* The frame pointer must be restored last since its old
5023 value may be used as base address for the frame. This
5024 is problematic in 64-bit mode only because of the lack
5025 of double-word load instruction. */
5026 if (i == HARD_FRAME_POINTER_REGNUM)
5029 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5033 else if (action_false == SORR_ADVANCE)
5039 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5040 emit_move_insn (hard_frame_pointer_rtx, mem);
5045 for (i = low; i < high; i += 2)
5047 bool reg0 = save_p (i, leaf_function);
5048 bool reg1 = save_p (i + 1, leaf_function);
5049 enum machine_mode mode;
5054 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5059 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5064 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5070 if (action_false == SORR_ADVANCE)
5075 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5076 if (action_true == SORR_SAVE)
5078 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5079 RTX_FRAME_RELATED_P (insn) = 1;
5083 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5085 set1 = gen_rtx_SET (VOIDmode, mem,
5086 gen_rtx_REG (SImode, regno));
5087 RTX_FRAME_RELATED_P (set1) = 1;
5089 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5091 set2 = gen_rtx_SET (VOIDmode, mem,
5092 gen_rtx_REG (SImode, regno + 1));
5093 RTX_FRAME_RELATED_P (set2) = 1;
5094 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5095 gen_rtx_PARALLEL (VOIDmode,
5096 gen_rtvec (2, set1, set2)));
5099 else /* action_true == SORR_RESTORE */
5100 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5102 /* Always preserve double-word alignment. */
5103 offset = (offset + 8) & -8;
5110 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5113 emit_adjust_base_to_offset (rtx base, int offset)
5115 /* ??? This might be optimized a little as %g1 might already have a
5116 value close enough that a single add insn will do. */
5117 /* ??? Although, all of this is probably only a temporary fix because
5118 if %g1 can hold a function result, then sparc_expand_epilogue will
5119 lose (the result will be clobbered). */
5120 rtx new_base = gen_rtx_REG (Pmode, 1);
5121 emit_move_insn (new_base, GEN_INT (offset));
5122 emit_insn (gen_rtx_SET (VOIDmode,
5123 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5127 /* Emit code to save/restore call-saved global and FP registers. */
5130 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5132 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5134 base = emit_adjust_base_to_offset (base, offset);
5139 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5140 save_global_or_fp_reg_p, action, SORR_NONE);
5141 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5142 save_global_or_fp_reg_p, action, SORR_NONE);
5145 /* Emit code to save/restore call-saved local and in registers. */
5148 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5150 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5152 base = emit_adjust_base_to_offset (base, offset);
5156 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5157 save_local_or_in_reg_p, action, SORR_ADVANCE);
5160 /* Emit a window_save insn. */
5163 emit_window_save (rtx increment)
5165 rtx insn = emit_insn (gen_window_save (increment));
5166 RTX_FRAME_RELATED_P (insn) = 1;
5168 /* The incoming return address (%o7) is saved in %i7. */
5169 add_reg_note (insn, REG_CFA_REGISTER,
5170 gen_rtx_SET (VOIDmode,
5171 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5173 INCOMING_RETURN_ADDR_REGNUM)));
5175 /* The window save event. */
5176 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5178 /* The CFA is %fp, the hard frame pointer. */
5179 add_reg_note (insn, REG_CFA_DEF_CFA,
5180 plus_constant (Pmode, hard_frame_pointer_rtx,
5181 INCOMING_FRAME_SP_OFFSET));
5186 /* Generate an increment for the stack pointer. */
5189 gen_stack_pointer_inc (rtx increment)
5191 return gen_rtx_SET (VOIDmode,
5193 gen_rtx_PLUS (Pmode,
5198 /* Expand the function prologue. The prologue is responsible for reserving
5199 storage for the frame, saving the call-saved registers and loading the
5200 GOT register if needed. */
5203 sparc_expand_prologue (void)
5208 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5209 on the final value of the flag means deferring the prologue/epilogue
5210 expansion until just before the second scheduling pass, which is too
5211 late to emit multiple epilogues or return insns.
5213 Of course we are making the assumption that the value of the flag
5214 will not change between now and its final value. Of the three parts
5215 of the formula, only the last one can reasonably vary. Let's take a
5216 closer look, after assuming that the first two ones are set to true
5217 (otherwise the last value is effectively silenced).
5219 If only_leaf_regs_used returns false, the global predicate will also
5220 be false so the actual frame size calculated below will be positive.
5221 As a consequence, the save_register_window insn will be emitted in
5222 the instruction stream; now this insn explicitly references %fp
5223 which is not a leaf register so only_leaf_regs_used will always
5224 return false subsequently.
5226 If only_leaf_regs_used returns true, we hope that the subsequent
5227 optimization passes won't cause non-leaf registers to pop up. For
5228 example, the regrename pass has special provisions to not rename to
5229 non-leaf registers in a leaf function. */
5230 sparc_leaf_function_p
5231 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5233 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5235 if (flag_stack_usage_info)
5236 current_function_static_stack_size = size;
5238 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5239 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5243 else if (sparc_leaf_function_p)
5245 rtx size_int_rtx = GEN_INT (-size);
5248 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5249 else if (size <= 8192)
5251 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5252 RTX_FRAME_RELATED_P (insn) = 1;
5254 /* %sp is still the CFA register. */
5255 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5259 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5260 emit_move_insn (size_rtx, size_int_rtx);
5261 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5262 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5263 gen_stack_pointer_inc (size_int_rtx));
5266 RTX_FRAME_RELATED_P (insn) = 1;
5270 rtx size_int_rtx = GEN_INT (-size);
5273 emit_window_save (size_int_rtx);
5274 else if (size <= 8192)
5276 emit_window_save (GEN_INT (-4096));
5278 /* %sp is not the CFA register anymore. */
5279 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5281 /* Make sure no %fp-based store is issued until after the frame is
5282 established. The offset between the frame pointer and the stack
5283 pointer is calculated relative to the value of the stack pointer
5284 at the end of the function prologue, and moving instructions that
5285 access the stack via the frame pointer between the instructions
5286 that decrement the stack pointer could result in accessing the
5287 register window save area, which is volatile. */
5288 emit_insn (gen_frame_blockage ());
5292 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5293 emit_move_insn (size_rtx, size_int_rtx);
5294 emit_window_save (size_rtx);
5298 if (sparc_leaf_function_p)
5300 sparc_frame_base_reg = stack_pointer_rtx;
5301 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5305 sparc_frame_base_reg = hard_frame_pointer_rtx;
5306 sparc_frame_base_offset = SPARC_STACK_BIAS;
5309 if (sparc_n_global_fp_regs > 0)
5310 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5311 sparc_frame_base_offset
5312 - sparc_apparent_frame_size,
5315 /* Load the GOT register if needed. */
5316 if (crtl->uses_pic_offset_table)
5317 load_got_register ();
5319 /* Advertise that the data calculated just above are now valid. */
5320 sparc_prologue_data_valid_p = true;
5323 /* Expand the function prologue. The prologue is responsible for reserving
5324 storage for the frame, saving the call-saved registers and loading the
5325 GOT register if needed. */
5328 sparc_flat_expand_prologue (void)
5333 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5335 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5337 if (flag_stack_usage_info)
5338 current_function_static_stack_size = size;
5340 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5341 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5343 if (sparc_save_local_in_regs_p)
5344 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5351 rtx size_int_rtx, size_rtx;
5353 size_rtx = size_int_rtx = GEN_INT (-size);
5355 /* We establish the frame (i.e. decrement the stack pointer) first, even
5356 if we use a frame pointer, because we cannot clobber any call-saved
5357 registers, including the frame pointer, if we haven't created a new
5358 register save area, for the sake of compatibility with the ABI. */
5360 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5361 else if (size <= 8192 && !frame_pointer_needed)
5363 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5364 RTX_FRAME_RELATED_P (insn) = 1;
5365 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5369 size_rtx = gen_rtx_REG (Pmode, 1);
5370 emit_move_insn (size_rtx, size_int_rtx);
5371 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5372 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5373 gen_stack_pointer_inc (size_int_rtx));
5375 RTX_FRAME_RELATED_P (insn) = 1;
5377 /* Ensure nothing is scheduled until after the frame is established. */
5378 emit_insn (gen_blockage ());
5380 if (frame_pointer_needed)
5382 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5383 gen_rtx_MINUS (Pmode,
5386 RTX_FRAME_RELATED_P (insn) = 1;
5388 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5389 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5390 plus_constant (Pmode, stack_pointer_rtx,
5394 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5396 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5397 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5399 insn = emit_move_insn (i7, o7);
5400 RTX_FRAME_RELATED_P (insn) = 1;
5402 add_reg_note (insn, REG_CFA_REGISTER,
5403 gen_rtx_SET (VOIDmode, i7, o7));
5405 /* Prevent this instruction from ever being considered dead,
5406 even if this function has no epilogue. */
5411 if (frame_pointer_needed)
5413 sparc_frame_base_reg = hard_frame_pointer_rtx;
5414 sparc_frame_base_offset = SPARC_STACK_BIAS;
5418 sparc_frame_base_reg = stack_pointer_rtx;
5419 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5422 if (sparc_n_global_fp_regs > 0)
5423 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5424 sparc_frame_base_offset
5425 - sparc_apparent_frame_size,
5428 /* Load the GOT register if needed. */
5429 if (crtl->uses_pic_offset_table)
5430 load_got_register ();
5432 /* Advertise that the data calculated just above are now valid. */
5433 sparc_prologue_data_valid_p = true;
5436 /* This function generates the assembly code for function entry, which boils
5437 down to emitting the necessary .register directives. */
5440 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5442 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5444 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5446 sparc_output_scratch_registers (file);
5449 /* Expand the function epilogue, either normal or part of a sibcall.
5450 We emit all the instructions except the return or the call. */
5453 sparc_expand_epilogue (bool for_eh)
5455 HOST_WIDE_INT size = sparc_frame_size;
5457 if (sparc_n_global_fp_regs > 0)
5458 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5459 sparc_frame_base_offset
5460 - sparc_apparent_frame_size,
5463 if (size == 0 || for_eh)
5465 else if (sparc_leaf_function_p)
5468 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5469 else if (size <= 8192)
5471 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5472 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5476 rtx reg = gen_rtx_REG (Pmode, 1);
5477 emit_move_insn (reg, GEN_INT (size));
5478 emit_insn (gen_stack_pointer_inc (reg));
5483 /* Expand the function epilogue, either normal or part of a sibcall.
5484 We emit all the instructions except the return or the call. */
5487 sparc_flat_expand_epilogue (bool for_eh)
5489 HOST_WIDE_INT size = sparc_frame_size;
5491 if (sparc_n_global_fp_regs > 0)
5492 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5493 sparc_frame_base_offset
5494 - sparc_apparent_frame_size,
5497 /* If we have a frame pointer, we'll need both to restore it before the
5498 frame is destroyed and use its current value in destroying the frame.
5499 Since we don't have an atomic way to do that in the flat window model,
5500 we save the current value into a temporary register (%g1). */
5501 if (frame_pointer_needed && !for_eh)
5502 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5504 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5505 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5506 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5508 if (sparc_save_local_in_regs_p)
5509 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5510 sparc_frame_base_offset,
5513 if (size == 0 || for_eh)
5515 else if (frame_pointer_needed)
5517 /* Make sure the frame is destroyed after everything else is done. */
5518 emit_insn (gen_blockage ());
5520 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5525 emit_insn (gen_blockage ());
5528 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5529 else if (size <= 8192)
5531 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5532 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5536 rtx reg = gen_rtx_REG (Pmode, 1);
5537 emit_move_insn (reg, GEN_INT (size));
5538 emit_insn (gen_stack_pointer_inc (reg));
5543 /* Return true if it is appropriate to emit `return' instructions in the
5544 body of a function. */
5547 sparc_can_use_return_insn_p (void)
5549 return sparc_prologue_data_valid_p
5550 && sparc_n_global_fp_regs == 0
5552 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5553 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5556 /* This function generates the assembly code for function exit. */
5559 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5561 /* If the last two instructions of a function are "call foo; dslot;"
5562 the return address might point to the first instruction in the next
5563 function and we have to output a dummy nop for the sake of sane
5564 backtraces in such cases. This is pointless for sibling calls since
5565 the return address is explicitly adjusted. */
5567 rtx insn, last_real_insn;
5569 insn = get_last_insn ();
5571 last_real_insn = prev_real_insn (insn);
5573 && NONJUMP_INSN_P (last_real_insn)
5574 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5575 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5578 && CALL_P (last_real_insn)
5579 && !SIBLING_CALL_P (last_real_insn))
5580 fputs("\tnop\n", file);
5582 sparc_output_deferred_case_vectors ();
5585 /* Output a 'restore' instruction. */
5588 output_restore (rtx pat)
5594 fputs ("\t restore\n", asm_out_file);
5598 gcc_assert (GET_CODE (pat) == SET);
5600 operands[0] = SET_DEST (pat);
5601 pat = SET_SRC (pat);
5603 switch (GET_CODE (pat))
5606 operands[1] = XEXP (pat, 0);
5607 operands[2] = XEXP (pat, 1);
5608 output_asm_insn (" restore %r1, %2, %Y0", operands);
5611 operands[1] = XEXP (pat, 0);
5612 operands[2] = XEXP (pat, 1);
5613 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5616 operands[1] = XEXP (pat, 0);
5617 gcc_assert (XEXP (pat, 1) == const1_rtx);
5618 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5622 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5627 /* Output a return. */
5630 output_return (rtx insn)
5632 if (crtl->calls_eh_return)
5634 /* If the function uses __builtin_eh_return, the eh_return
5635 machinery occupies the delay slot. */
5636 gcc_assert (!final_sequence);
5638 if (flag_delayed_branch)
5640 if (!TARGET_FLAT && TARGET_V9)
5641 fputs ("\treturn\t%i7+8\n", asm_out_file);
5645 fputs ("\trestore\n", asm_out_file);
5647 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5650 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5655 fputs ("\trestore\n", asm_out_file);
5657 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5658 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5661 else if (sparc_leaf_function_p || TARGET_FLAT)
5663 /* This is a leaf or flat function so we don't have to bother restoring
5664 the register window, which frees us from dealing with the convoluted
5665 semantics of restore/return. We simply output the jump to the
5666 return address and the insn in the delay slot (if any). */
5668 return "jmp\t%%o7+%)%#";
5672 /* This is a regular function so we have to restore the register window.
5673 We may have a pending insn for the delay slot, which will be either
5674 combined with the 'restore' instruction or put in the delay slot of
5675 the 'return' instruction. */
5681 delay = NEXT_INSN (insn);
5684 pat = PATTERN (delay);
5686 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5688 epilogue_renumber (&pat, 0);
5689 return "return\t%%i7+%)%#";
5693 output_asm_insn ("jmp\t%%i7+%)", NULL);
5694 output_restore (pat);
5695 PATTERN (delay) = gen_blockage ();
5696 INSN_CODE (delay) = -1;
5701 /* The delay slot is empty. */
5703 return "return\t%%i7+%)\n\t nop";
5704 else if (flag_delayed_branch)
5705 return "jmp\t%%i7+%)\n\t restore";
5707 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5714 /* Output a sibling call. */
5717 output_sibcall (rtx insn, rtx call_operand)
5721 gcc_assert (flag_delayed_branch);
5723 operands[0] = call_operand;
5725 if (sparc_leaf_function_p || TARGET_FLAT)
5727 /* This is a leaf or flat function so we don't have to bother restoring
5728 the register window. We simply output the jump to the function and
5729 the insn in the delay slot (if any). */
5731 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5734 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5737 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5738 it into branch if possible. */
5739 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5744 /* This is a regular function so we have to restore the register window.
5745 We may have a pending insn for the delay slot, which will be combined
5746 with the 'restore' instruction. */
5748 output_asm_insn ("call\t%a0, 0", operands);
5752 rtx delay = NEXT_INSN (insn);
5755 output_restore (PATTERN (delay));
5757 PATTERN (delay) = gen_blockage ();
5758 INSN_CODE (delay) = -1;
5761 output_restore (NULL_RTX);
5767 /* Functions for handling argument passing.
5769 For 32-bit, the first 6 args are normally in registers and the rest are
5770 pushed. Any arg that starts within the first 6 words is at least
5771 partially passed in a register unless its data type forbids.
5773 For 64-bit, the argument registers are laid out as an array of 16 elements
5774 and arguments are added sequentially. The first 6 int args and up to the
5775 first 16 fp args (depending on size) are passed in regs.
5777 Slot Stack Integral Float Float in structure Double Long Double
5778 ---- ----- -------- ----- ------------------ ------ -----------
5779 15 [SP+248] %f31 %f30,%f31 %d30
5780 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5781 13 [SP+232] %f27 %f26,%f27 %d26
5782 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5783 11 [SP+216] %f23 %f22,%f23 %d22
5784 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5785 9 [SP+200] %f19 %f18,%f19 %d18
5786 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5787 7 [SP+184] %f15 %f14,%f15 %d14
5788 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5789 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5790 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5791 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5792 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5793 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5794 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5796 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5798 Integral arguments are always passed as 64-bit quantities appropriately
5801 Passing of floating point values is handled as follows.
5802 If a prototype is in scope:
5803 If the value is in a named argument (i.e. not a stdarg function or a
5804 value not part of the `...') then the value is passed in the appropriate
5806 If the value is part of the `...' and is passed in one of the first 6
5807 slots then the value is passed in the appropriate int reg.
5808 If the value is part of the `...' and is not passed in one of the first 6
5809 slots then the value is passed in memory.
5810 If a prototype is not in scope:
5811 If the value is one of the first 6 arguments the value is passed in the
5812 appropriate integer reg and the appropriate fp reg.
5813 If the value is not one of the first 6 arguments the value is passed in
5814 the appropriate fp reg and in memory.
5817 Summary of the calling conventions implemented by GCC on the SPARC:
5820 size argument return value
5822 small integer <4 int. reg. int. reg.
5823 word 4 int. reg. int. reg.
5824 double word 8 int. reg. int. reg.
5826 _Complex small integer <8 int. reg. int. reg.
5827 _Complex word 8 int. reg. int. reg.
5828 _Complex double word 16 memory int. reg.
5830 vector integer <=8 int. reg. FP reg.
5831 vector integer >8 memory memory
5833 float 4 int. reg. FP reg.
5834 double 8 int. reg. FP reg.
5835 long double 16 memory memory
5837 _Complex float 8 memory FP reg.
5838 _Complex double 16 memory FP reg.
5839 _Complex long double 32 memory FP reg.
5841 vector float any memory memory
5843 aggregate any memory memory
5848 size argument return value
5850 small integer <8 int. reg. int. reg.
5851 word 8 int. reg. int. reg.
5852 double word 16 int. reg. int. reg.
5854 _Complex small integer <16 int. reg. int. reg.
5855 _Complex word 16 int. reg. int. reg.
5856 _Complex double word 32 memory int. reg.
5858 vector integer <=16 FP reg. FP reg.
5859 vector integer 16<s<=32 memory FP reg.
5860 vector integer >32 memory memory
5862 float 4 FP reg. FP reg.
5863 double 8 FP reg. FP reg.
5864 long double 16 FP reg. FP reg.
5866 _Complex float 8 FP reg. FP reg.
5867 _Complex double 16 FP reg. FP reg.
5868 _Complex long double 32 memory FP reg.
5870 vector float <=16 FP reg. FP reg.
5871 vector float 16<s<=32 memory FP reg.
5872 vector float >32 memory memory
5874 aggregate <=16 reg. reg.
5875 aggregate 16<s<=32 memory reg.
5876 aggregate >32 memory memory
5880 Note #1: complex floating-point types follow the extended SPARC ABIs as
5881 implemented by the Sun compiler.
5883 Note #2: integral vector types follow the scalar floating-point types
5884 conventions to match what is implemented by the Sun VIS SDK.
5886 Note #3: floating-point vector types follow the aggregate types
5890 /* Maximum number of int regs for args. */
5891 #define SPARC_INT_ARG_MAX 6
5892 /* Maximum number of fp regs for args. */
5893 #define SPARC_FP_ARG_MAX 16
5895 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
5897 /* Handle the INIT_CUMULATIVE_ARGS macro.
5898 Initialize a variable CUM of type CUMULATIVE_ARGS
5899 for a call to a function whose data type is FNTYPE.
5900 For a library call, FNTYPE is 0. */
5903 init_cumulative_args (struct sparc_args *cum, tree fntype,
5904 rtx libname ATTRIBUTE_UNUSED,
5905 tree fndecl ATTRIBUTE_UNUSED)
5908 cum->prototype_p = fntype && prototype_p (fntype);
5909 cum->libcall_p = fntype == 0;
5912 /* Handle promotion of pointer and integer arguments. */
5914 static enum machine_mode
5915 sparc_promote_function_mode (const_tree type,
5916 enum machine_mode mode,
5918 const_tree fntype ATTRIBUTE_UNUSED,
5919 int for_return ATTRIBUTE_UNUSED)
5921 if (type != NULL_TREE && POINTER_TYPE_P (type))
5923 *punsignedp = POINTERS_EXTEND_UNSIGNED;
5927 /* Integral arguments are passed as full words, as per the ABI. */
5928 if (GET_MODE_CLASS (mode) == MODE_INT
5929 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5935 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
5938 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
5940 return TARGET_ARCH64 ? true : false;
5943 /* Scan the record type TYPE and return the following predicates:
5944 - INTREGS_P: the record contains at least one field or sub-field
5945 that is eligible for promotion in integer registers.
5946 - FP_REGS_P: the record contains at least one field or sub-field
5947 that is eligible for promotion in floating-point registers.
5948 - PACKED_P: the record contains at least one field that is packed.
5950 Sub-fields are not taken into account for the PACKED_P predicate. */
5953 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
5958 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5960 if (TREE_CODE (field) == FIELD_DECL)
5962 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
5963 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
5964 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
5965 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5971 if (packed_p && DECL_PACKED (field))
5977 /* Compute the slot number to pass an argument in.
5978 Return the slot number or -1 if passing on the stack.
5980 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5981 the preceding args and about the function being called.
5982 MODE is the argument's machine mode.
5983 TYPE is the data type of the argument (as a tree).
5984 This is null for libcalls where that information may
5986 NAMED is nonzero if this argument is a named parameter
5987 (otherwise it is an extra parameter matching an ellipsis).
5988 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
5989 *PREGNO records the register number to use if scalar type.
5990 *PPADDING records the amount of padding needed in words. */
5993 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
5994 const_tree type, bool named, bool incoming_p,
5995 int *pregno, int *ppadding)
5997 int regbase = (incoming_p
5998 ? SPARC_INCOMING_INT_ARG_FIRST
5999 : SPARC_OUTGOING_INT_ARG_FIRST);
6000 int slotno = cum->words;
6001 enum mode_class mclass;
6006 if (type && TREE_ADDRESSABLE (type))
6012 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6015 /* For SPARC64, objects requiring 16-byte alignment get it. */
6017 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6018 && (slotno & 1) != 0)
6019 slotno++, *ppadding = 1;
6021 mclass = GET_MODE_CLASS (mode);
6022 if (type && TREE_CODE (type) == VECTOR_TYPE)
6024 /* Vector types deserve special treatment because they are
6025 polymorphic wrt their mode, depending upon whether VIS
6026 instructions are enabled. */
6027 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6029 /* The SPARC port defines no floating-point vector modes. */
6030 gcc_assert (mode == BLKmode);
6034 /* Integral vector types should either have a vector
6035 mode or an integral mode, because we are guaranteed
6036 by pass_by_reference that their size is not greater
6037 than 16 bytes and TImode is 16-byte wide. */
6038 gcc_assert (mode != BLKmode);
6040 /* Vector integers are handled like floats according to
6042 mclass = MODE_FLOAT;
6049 case MODE_COMPLEX_FLOAT:
6050 case MODE_VECTOR_INT:
6051 if (TARGET_ARCH64 && TARGET_FPU && named)
6053 if (slotno >= SPARC_FP_ARG_MAX)
6055 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6056 /* Arguments filling only one single FP register are
6057 right-justified in the outer double FP register. */
6058 if (GET_MODE_SIZE (mode) <= 4)
6065 case MODE_COMPLEX_INT:
6066 if (slotno >= SPARC_INT_ARG_MAX)
6068 regno = regbase + slotno;
6072 if (mode == VOIDmode)
6073 /* MODE is VOIDmode when generating the actual call. */
6076 gcc_assert (mode == BLKmode);
6080 || (TREE_CODE (type) != VECTOR_TYPE
6081 && TREE_CODE (type) != RECORD_TYPE))
6083 if (slotno >= SPARC_INT_ARG_MAX)
6085 regno = regbase + slotno;
6087 else /* TARGET_ARCH64 && type */
6089 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6091 /* First see what kinds of registers we would need. */
6092 if (TREE_CODE (type) == VECTOR_TYPE)
6095 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6097 /* The ABI obviously doesn't specify how packed structures
6098 are passed. These are defined to be passed in int regs
6099 if possible, otherwise memory. */
6100 if (packed_p || !named)
6101 fpregs_p = 0, intregs_p = 1;
6103 /* If all arg slots are filled, then must pass on stack. */
6104 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6107 /* If there are only int args and all int arg slots are filled,
6108 then must pass on stack. */
6109 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6112 /* Note that even if all int arg slots are filled, fp members may
6113 still be passed in regs if such regs are available.
6114 *PREGNO isn't set because there may be more than one, it's up
6115 to the caller to compute them. */
6128 /* Handle recursive register counting for structure field layout. */
6130 struct function_arg_record_value_parms
6132 rtx ret; /* return expression being built. */
6133 int slotno; /* slot number of the argument. */
6134 int named; /* whether the argument is named. */
6135 int regbase; /* regno of the base register. */
6136 int stack; /* 1 if part of the argument is on the stack. */
6137 int intoffset; /* offset of the first pending integer field. */
6138 unsigned int nregs; /* number of words passed in registers. */
6141 static void function_arg_record_value_3
6142 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6143 static void function_arg_record_value_2
6144 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6145 static void function_arg_record_value_1
6146 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6147 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6148 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6150 /* A subroutine of function_arg_record_value. Traverse the structure
6151 recursively and determine how many registers will be required. */
6154 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6155 struct function_arg_record_value_parms *parms,
6160 /* We need to compute how many registers are needed so we can
6161 allocate the PARALLEL but before we can do that we need to know
6162 whether there are any packed fields. The ABI obviously doesn't
6163 specify how structures are passed in this case, so they are
6164 defined to be passed in int regs if possible, otherwise memory,
6165 regardless of whether there are fp values present. */
6168 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6170 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6177 /* Compute how many registers we need. */
6178 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6180 if (TREE_CODE (field) == FIELD_DECL)
6182 HOST_WIDE_INT bitpos = startbitpos;
6184 if (DECL_SIZE (field) != 0)
6186 if (integer_zerop (DECL_SIZE (field)))
6189 if (host_integerp (bit_position (field), 1))
6190 bitpos += int_bit_position (field);
6193 /* ??? FIXME: else assume zero offset. */
6195 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6196 function_arg_record_value_1 (TREE_TYPE (field),
6200 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6201 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6206 if (parms->intoffset != -1)
6208 unsigned int startbit, endbit;
6209 int intslots, this_slotno;
6211 startbit = parms->intoffset & -BITS_PER_WORD;
6212 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6214 intslots = (endbit - startbit) / BITS_PER_WORD;
6215 this_slotno = parms->slotno + parms->intoffset
6218 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6220 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6221 /* We need to pass this field on the stack. */
6225 parms->nregs += intslots;
6226 parms->intoffset = -1;
6229 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6230 If it wasn't true we wouldn't be here. */
6231 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6232 && DECL_MODE (field) == BLKmode)
6233 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6234 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6241 if (parms->intoffset == -1)
6242 parms->intoffset = bitpos;
6248 /* A subroutine of function_arg_record_value. Assign the bits of the
6249 structure between parms->intoffset and bitpos to integer registers. */
6252 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6253 struct function_arg_record_value_parms *parms)
6255 enum machine_mode mode;
6257 unsigned int startbit, endbit;
6258 int this_slotno, intslots, intoffset;
6261 if (parms->intoffset == -1)
6264 intoffset = parms->intoffset;
6265 parms->intoffset = -1;
6267 startbit = intoffset & -BITS_PER_WORD;
6268 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6269 intslots = (endbit - startbit) / BITS_PER_WORD;
6270 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6272 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6276 /* If this is the trailing part of a word, only load that much into
6277 the register. Otherwise load the whole register. Note that in
6278 the latter case we may pick up unwanted bits. It's not a problem
6279 at the moment but may wish to revisit. */
6281 if (intoffset % BITS_PER_WORD != 0)
6282 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6287 intoffset /= BITS_PER_UNIT;
6290 regno = parms->regbase + this_slotno;
6291 reg = gen_rtx_REG (mode, regno);
6292 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6293 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6296 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6301 while (intslots > 0);
6304 /* A subroutine of function_arg_record_value. Traverse the structure
6305 recursively and assign bits to floating point registers. Track which
6306 bits in between need integer registers; invoke function_arg_record_value_3
6307 to make that happen. */
6310 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6311 struct function_arg_record_value_parms *parms,
6317 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6319 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6326 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6328 if (TREE_CODE (field) == FIELD_DECL)
6330 HOST_WIDE_INT bitpos = startbitpos;
6332 if (DECL_SIZE (field) != 0)
6334 if (integer_zerop (DECL_SIZE (field)))
6337 if (host_integerp (bit_position (field), 1))
6338 bitpos += int_bit_position (field);
6341 /* ??? FIXME: else assume zero offset. */
6343 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6344 function_arg_record_value_2 (TREE_TYPE (field),
6348 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6349 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6354 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6355 int regno, nregs, pos;
6356 enum machine_mode mode = DECL_MODE (field);
6359 function_arg_record_value_3 (bitpos, parms);
6361 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6364 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6365 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6367 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6369 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6375 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6376 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6378 reg = gen_rtx_REG (mode, regno);
6379 pos = bitpos / BITS_PER_UNIT;
6380 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6381 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6385 regno += GET_MODE_SIZE (mode) / 4;
6386 reg = gen_rtx_REG (mode, regno);
6387 pos += GET_MODE_SIZE (mode);
6388 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6389 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6395 if (parms->intoffset == -1)
6396 parms->intoffset = bitpos;
6402 /* Used by function_arg and sparc_function_value_1 to implement the complex
6403 conventions of the 64-bit ABI for passing and returning structures.
6404 Return an expression valid as a return value for the FUNCTION_ARG
6405 and TARGET_FUNCTION_VALUE.
6407 TYPE is the data type of the argument (as a tree).
6408 This is null for libcalls where that information may
6410 MODE is the argument's machine mode.
6411 SLOTNO is the index number of the argument's slot in the parameter array.
6412 NAMED is nonzero if this argument is a named parameter
6413 (otherwise it is an extra parameter matching an ellipsis).
6414 REGBASE is the regno of the base register for the parameter array. */
6417 function_arg_record_value (const_tree type, enum machine_mode mode,
6418 int slotno, int named, int regbase)
6420 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6421 struct function_arg_record_value_parms parms;
6424 parms.ret = NULL_RTX;
6425 parms.slotno = slotno;
6426 parms.named = named;
6427 parms.regbase = regbase;
6430 /* Compute how many registers we need. */
6432 parms.intoffset = 0;
6433 function_arg_record_value_1 (type, 0, &parms, false);
6435 /* Take into account pending integer fields. */
6436 if (parms.intoffset != -1)
6438 unsigned int startbit, endbit;
6439 int intslots, this_slotno;
6441 startbit = parms.intoffset & -BITS_PER_WORD;
6442 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6443 intslots = (endbit - startbit) / BITS_PER_WORD;
6444 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6446 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6448 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6449 /* We need to pass this field on the stack. */
6453 parms.nregs += intslots;
6455 nregs = parms.nregs;
6457 /* Allocate the vector and handle some annoying special cases. */
6460 /* ??? Empty structure has no value? Duh? */
6463 /* Though there's nothing really to store, return a word register
6464 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6465 leads to breakage due to the fact that there are zero bytes to
6467 return gen_rtx_REG (mode, regbase);
6471 /* ??? C++ has structures with no fields, and yet a size. Give up
6472 for now and pass everything back in integer registers. */
6473 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6475 if (nregs + slotno > SPARC_INT_ARG_MAX)
6476 nregs = SPARC_INT_ARG_MAX - slotno;
6478 gcc_assert (nregs != 0);
6480 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6482 /* If at least one field must be passed on the stack, generate
6483 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6484 also be passed on the stack. We can't do much better because the
6485 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6486 of structures for which the fields passed exclusively in registers
6487 are not at the beginning of the structure. */
6489 XVECEXP (parms.ret, 0, 0)
6490 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6492 /* Fill in the entries. */
6494 parms.intoffset = 0;
6495 function_arg_record_value_2 (type, 0, &parms, false);
6496 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6498 gcc_assert (parms.nregs == nregs);
6503 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6504 of the 64-bit ABI for passing and returning unions.
6505 Return an expression valid as a return value for the FUNCTION_ARG
6506 and TARGET_FUNCTION_VALUE.
6508 SIZE is the size in bytes of the union.
6509 MODE is the argument's machine mode.
6510 REGNO is the hard register the union will be passed in. */
6513 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6516 int nwords = ROUND_ADVANCE (size), i;
6519 /* See comment in previous function for empty structures. */
6521 return gen_rtx_REG (mode, regno);
6523 if (slotno == SPARC_INT_ARG_MAX - 1)
6526 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6528 for (i = 0; i < nwords; i++)
6530 /* Unions are passed left-justified. */
6531 XVECEXP (regs, 0, i)
6532 = gen_rtx_EXPR_LIST (VOIDmode,
6533 gen_rtx_REG (word_mode, regno),
6534 GEN_INT (UNITS_PER_WORD * i));
6541 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6542 for passing and returning large (BLKmode) vectors.
6543 Return an expression valid as a return value for the FUNCTION_ARG
6544 and TARGET_FUNCTION_VALUE.
6546 SIZE is the size in bytes of the vector (at least 8 bytes).
6547 REGNO is the FP hard register the vector will be passed in. */
6550 function_arg_vector_value (int size, int regno)
6552 int i, nregs = size / 8;
6555 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6557 for (i = 0; i < nregs; i++)
6559 XVECEXP (regs, 0, i)
6560 = gen_rtx_EXPR_LIST (VOIDmode,
6561 gen_rtx_REG (DImode, regno + 2*i),
6568 /* Determine where to put an argument to a function.
6569 Value is zero to push the argument on the stack,
6570 or a hard register in which to store the argument.
6572 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6573 the preceding args and about the function being called.
6574 MODE is the argument's machine mode.
6575 TYPE is the data type of the argument (as a tree).
6576 This is null for libcalls where that information may
6578 NAMED is true if this argument is a named parameter
6579 (otherwise it is an extra parameter matching an ellipsis).
6580 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6581 TARGET_FUNCTION_INCOMING_ARG. */
6584 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6585 const_tree type, bool named, bool incoming_p)
6587 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6589 int regbase = (incoming_p
6590 ? SPARC_INCOMING_INT_ARG_FIRST
6591 : SPARC_OUTGOING_INT_ARG_FIRST);
6592 int slotno, regno, padding;
6593 enum mode_class mclass = GET_MODE_CLASS (mode);
6595 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6600 /* Vector types deserve special treatment because they are polymorphic wrt
6601 their mode, depending upon whether VIS instructions are enabled. */
6602 if (type && TREE_CODE (type) == VECTOR_TYPE)
6604 HOST_WIDE_INT size = int_size_in_bytes (type);
6605 gcc_assert ((TARGET_ARCH32 && size <= 8)
6606 || (TARGET_ARCH64 && size <= 16));
6608 if (mode == BLKmode)
6609 return function_arg_vector_value (size,
6610 SPARC_FP_ARG_FIRST + 2*slotno);
6612 mclass = MODE_FLOAT;
6616 return gen_rtx_REG (mode, regno);
6618 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6619 and are promoted to registers if possible. */
6620 if (type && TREE_CODE (type) == RECORD_TYPE)
6622 HOST_WIDE_INT size = int_size_in_bytes (type);
6623 gcc_assert (size <= 16);
6625 return function_arg_record_value (type, mode, slotno, named, regbase);
6628 /* Unions up to 16 bytes in size are passed in integer registers. */
6629 else if (type && TREE_CODE (type) == UNION_TYPE)
6631 HOST_WIDE_INT size = int_size_in_bytes (type);
6632 gcc_assert (size <= 16);
6634 return function_arg_union_value (size, mode, slotno, regno);
6637 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6638 but also have the slot allocated for them.
6639 If no prototype is in scope fp values in register slots get passed
6640 in two places, either fp regs and int regs or fp regs and memory. */
6641 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6642 && SPARC_FP_REG_P (regno))
6644 rtx reg = gen_rtx_REG (mode, regno);
6645 if (cum->prototype_p || cum->libcall_p)
6647 /* "* 2" because fp reg numbers are recorded in 4 byte
6650 /* ??? This will cause the value to be passed in the fp reg and
6651 in the stack. When a prototype exists we want to pass the
6652 value in the reg but reserve space on the stack. That's an
6653 optimization, and is deferred [for a bit]. */
6654 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6655 return gen_rtx_PARALLEL (mode,
6657 gen_rtx_EXPR_LIST (VOIDmode,
6658 NULL_RTX, const0_rtx),
6659 gen_rtx_EXPR_LIST (VOIDmode,
6663 /* ??? It seems that passing back a register even when past
6664 the area declared by REG_PARM_STACK_SPACE will allocate
6665 space appropriately, and will not copy the data onto the
6666 stack, exactly as we desire.
6668 This is due to locate_and_pad_parm being called in
6669 expand_call whenever reg_parm_stack_space > 0, which
6670 while beneficial to our example here, would seem to be
6671 in error from what had been intended. Ho hum... -- r~ */
6679 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6683 /* On incoming, we don't need to know that the value
6684 is passed in %f0 and %i0, and it confuses other parts
6685 causing needless spillage even on the simplest cases. */
6689 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6690 + (regno - SPARC_FP_ARG_FIRST) / 2);
6692 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6693 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6695 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6699 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6700 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6701 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6706 /* All other aggregate types are passed in an integer register in a mode
6707 corresponding to the size of the type. */
6708 else if (type && AGGREGATE_TYPE_P (type))
6710 HOST_WIDE_INT size = int_size_in_bytes (type);
6711 gcc_assert (size <= 16);
6713 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6716 return gen_rtx_REG (mode, regno);
6719 /* Handle the TARGET_FUNCTION_ARG target hook. */
6722 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6723 const_tree type, bool named)
6725 return sparc_function_arg_1 (cum, mode, type, named, false);
6728 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6731 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6732 const_tree type, bool named)
6734 return sparc_function_arg_1 (cum, mode, type, named, true);
6737 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6740 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6742 return ((TARGET_ARCH64
6743 && (GET_MODE_ALIGNMENT (mode) == 128
6744 || (type && TYPE_ALIGN (type) == 128)))
6749 /* For an arg passed partly in registers and partly in memory,
6750 this is the number of bytes of registers used.
6751 For args passed entirely in registers or entirely in memory, zero.
6753 Any arg that starts in the first 6 regs but won't entirely fit in them
6754 needs partial registers on v8. On v9, structures with integer
6755 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6756 values that begin in the last fp reg [where "last fp reg" varies with the
6757 mode] will be split between that reg and memory. */
6760 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6761 tree type, bool named)
6763 int slotno, regno, padding;
6765 /* We pass false for incoming_p here, it doesn't matter. */
6766 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6767 false, ®no, &padding);
6774 if ((slotno + (mode == BLKmode
6775 ? ROUND_ADVANCE (int_size_in_bytes (type))
6776 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6777 > SPARC_INT_ARG_MAX)
6778 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6782 /* We are guaranteed by pass_by_reference that the size of the
6783 argument is not greater than 16 bytes, so we only need to return
6784 one word if the argument is partially passed in registers. */
6786 if (type && AGGREGATE_TYPE_P (type))
6788 int size = int_size_in_bytes (type);
6790 if (size > UNITS_PER_WORD
6791 && slotno == SPARC_INT_ARG_MAX - 1)
6792 return UNITS_PER_WORD;
6794 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6795 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6796 && ! (TARGET_FPU && named)))
6798 /* The complex types are passed as packed types. */
6799 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6800 && slotno == SPARC_INT_ARG_MAX - 1)
6801 return UNITS_PER_WORD;
6803 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6805 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
6807 return UNITS_PER_WORD;
6814 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6815 Specify whether to pass the argument by reference. */
6818 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6819 enum machine_mode mode, const_tree type,
6820 bool named ATTRIBUTE_UNUSED)
6823 /* Original SPARC 32-bit ABI says that structures and unions,
6824 and quad-precision floats are passed by reference. For Pascal,
6825 also pass arrays by reference. All other base types are passed
6828 Extended ABI (as implemented by the Sun compiler) says that all
6829 complex floats are passed by reference. Pass complex integers
6830 in registers up to 8 bytes. More generally, enforce the 2-word
6831 cap for passing arguments in registers.
6833 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6834 integers are passed like floats of the same size, that is in
6835 registers up to 8 bytes. Pass all vector floats by reference
6836 like structure and unions. */
6837 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6839 /* Catch CDImode, TFmode, DCmode and TCmode. */
6840 || GET_MODE_SIZE (mode) > 8
6842 && TREE_CODE (type) == VECTOR_TYPE
6843 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6845 /* Original SPARC 64-bit ABI says that structures and unions
6846 smaller than 16 bytes are passed in registers, as well as
6847 all other base types.
6849 Extended ABI (as implemented by the Sun compiler) says that
6850 complex floats are passed in registers up to 16 bytes. Pass
6851 all complex integers in registers up to 16 bytes. More generally,
6852 enforce the 2-word cap for passing arguments in registers.
6854 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6855 integers are passed like floats of the same size, that is in
6856 registers (up to 16 bytes). Pass all vector floats like structure
6859 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
6860 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6861 /* Catch CTImode and TCmode. */
6862 || GET_MODE_SIZE (mode) > 16);
6865 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
6866 Update the data in CUM to advance over an argument
6867 of mode MODE and data type TYPE.
6868 TYPE is null for libcalls where that information may not be available. */
6871 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6872 const_tree type, bool named)
6874 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6877 /* We pass false for incoming_p here, it doesn't matter. */
6878 function_arg_slotno (cum, mode, type, named, false, ®no, &padding);
6880 /* If argument requires leading padding, add it. */
6881 cum->words += padding;
6885 cum->words += (mode != BLKmode
6886 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6887 : ROUND_ADVANCE (int_size_in_bytes (type)));
6891 if (type && AGGREGATE_TYPE_P (type))
6893 int size = int_size_in_bytes (type);
6897 else if (size <= 16)
6899 else /* passed by reference */
6904 cum->words += (mode != BLKmode
6905 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6906 : ROUND_ADVANCE (int_size_in_bytes (type)));
6911 /* Handle the FUNCTION_ARG_PADDING macro.
6912 For the 64 bit ABI structs are always stored left shifted in their
6916 function_arg_padding (enum machine_mode mode, const_tree type)
6918 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
6921 /* Fall back to the default. */
6922 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
6925 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
6926 Specify whether to return the return value in memory. */
6929 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6932 /* Original SPARC 32-bit ABI says that structures and unions,
6933 and quad-precision floats are returned in memory. All other
6934 base types are returned in registers.
6936 Extended ABI (as implemented by the Sun compiler) says that
6937 all complex floats are returned in registers (8 FP registers
6938 at most for '_Complex long double'). Return all complex integers
6939 in registers (4 at most for '_Complex long long').
6941 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6942 integers are returned like floats of the same size, that is in
6943 registers up to 8 bytes and in memory otherwise. Return all
6944 vector floats in memory like structure and unions; note that
6945 they always have BLKmode like the latter. */
6946 return (TYPE_MODE (type) == BLKmode
6947 || TYPE_MODE (type) == TFmode
6948 || (TREE_CODE (type) == VECTOR_TYPE
6949 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6951 /* Original SPARC 64-bit ABI says that structures and unions
6952 smaller than 32 bytes are returned in registers, as well as
6953 all other base types.
6955 Extended ABI (as implemented by the Sun compiler) says that all
6956 complex floats are returned in registers (8 FP registers at most
6957 for '_Complex long double'). Return all complex integers in
6958 registers (4 at most for '_Complex TItype').
6960 Vector ABI (as implemented by the Sun VIS SDK) says that vector
6961 integers are returned like floats of the same size, that is in
6962 registers. Return all vector floats like structure and unions;
6963 note that they always have BLKmode like the latter. */
6964 return (TYPE_MODE (type) == BLKmode
6965 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
6968 /* Handle the TARGET_STRUCT_VALUE target hook.
6969 Return where to find the structure return value address. */
6972 sparc_struct_value_rtx (tree fndecl, int incoming)
6981 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
6982 STRUCT_VALUE_OFFSET));
6984 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
6985 STRUCT_VALUE_OFFSET));
6987 /* Only follow the SPARC ABI for fixed-size structure returns.
6988 Variable size structure returns are handled per the normal
6989 procedures in GCC. This is enabled by -mstd-struct-return */
6991 && sparc_std_struct_return
6992 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
6993 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
6995 /* We must check and adjust the return address, as it is
6996 optional as to whether the return object is really
6998 rtx ret_reg = gen_rtx_REG (Pmode, 31);
6999 rtx scratch = gen_reg_rtx (SImode);
7000 rtx endlab = gen_label_rtx ();
7002 /* Calculate the return object size */
7003 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7004 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7005 /* Construct a temporary return value */
7007 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7009 /* Implement SPARC 32-bit psABI callee return struct checking:
7011 Fetch the instruction where we will return to and see if
7012 it's an unimp instruction (the most significant 10 bits
7014 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7015 plus_constant (Pmode,
7017 /* Assume the size is valid and pre-adjust */
7018 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7019 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7021 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7022 /* Write the address of the memory pointed to by temp_val into
7023 the memory pointed to by mem */
7024 emit_move_insn (mem, XEXP (temp_val, 0));
7025 emit_label (endlab);
7032 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7033 For v9, function return values are subject to the same rules as arguments,
7034 except that up to 32 bytes may be returned in registers. */
7037 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7040 /* Beware that the two values are swapped here wrt function_arg. */
7041 int regbase = (outgoing
7042 ? SPARC_INCOMING_INT_ARG_FIRST
7043 : SPARC_OUTGOING_INT_ARG_FIRST);
7044 enum mode_class mclass = GET_MODE_CLASS (mode);
7047 /* Vector types deserve special treatment because they are polymorphic wrt
7048 their mode, depending upon whether VIS instructions are enabled. */
7049 if (type && TREE_CODE (type) == VECTOR_TYPE)
7051 HOST_WIDE_INT size = int_size_in_bytes (type);
7052 gcc_assert ((TARGET_ARCH32 && size <= 8)
7053 || (TARGET_ARCH64 && size <= 32));
7055 if (mode == BLKmode)
7056 return function_arg_vector_value (size,
7057 SPARC_FP_ARG_FIRST);
7059 mclass = MODE_FLOAT;
7062 if (TARGET_ARCH64 && type)
7064 /* Structures up to 32 bytes in size are returned in registers. */
7065 if (TREE_CODE (type) == RECORD_TYPE)
7067 HOST_WIDE_INT size = int_size_in_bytes (type);
7068 gcc_assert (size <= 32);
7070 return function_arg_record_value (type, mode, 0, 1, regbase);
7073 /* Unions up to 32 bytes in size are returned in integer registers. */
7074 else if (TREE_CODE (type) == UNION_TYPE)
7076 HOST_WIDE_INT size = int_size_in_bytes (type);
7077 gcc_assert (size <= 32);
7079 return function_arg_union_value (size, mode, 0, regbase);
7082 /* Objects that require it are returned in FP registers. */
7083 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7086 /* All other aggregate types are returned in an integer register in a
7087 mode corresponding to the size of the type. */
7088 else if (AGGREGATE_TYPE_P (type))
7090 /* All other aggregate types are passed in an integer register
7091 in a mode corresponding to the size of the type. */
7092 HOST_WIDE_INT size = int_size_in_bytes (type);
7093 gcc_assert (size <= 32);
7095 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7097 /* ??? We probably should have made the same ABI change in
7098 3.4.0 as the one we made for unions. The latter was
7099 required by the SCD though, while the former is not
7100 specified, so we favored compatibility and efficiency.
7102 Now we're stuck for aggregates larger than 16 bytes,
7103 because OImode vanished in the meantime. Let's not
7104 try to be unduly clever, and simply follow the ABI
7105 for unions in that case. */
7106 if (mode == BLKmode)
7107 return function_arg_union_value (size, mode, 0, regbase);
7112 /* We should only have pointer and integer types at this point. This
7113 must match sparc_promote_function_mode. */
7114 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7118 /* We should only have pointer and integer types at this point. This must
7119 match sparc_promote_function_mode. */
7120 else if (TARGET_ARCH32
7121 && mclass == MODE_INT
7122 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7125 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7126 regno = SPARC_FP_ARG_FIRST;
7130 return gen_rtx_REG (mode, regno);
7133 /* Handle TARGET_FUNCTION_VALUE.
7134 On the SPARC, the value is found in the first "output" register, but the
7135 called function leaves it in the first "input" register. */
7138 sparc_function_value (const_tree valtype,
7139 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7142 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7145 /* Handle TARGET_LIBCALL_VALUE. */
7148 sparc_libcall_value (enum machine_mode mode,
7149 const_rtx fun ATTRIBUTE_UNUSED)
7151 return sparc_function_value_1 (NULL_TREE, mode, false);
7154 /* Handle FUNCTION_VALUE_REGNO_P.
7155 On the SPARC, the first "output" reg is used for integer values, and the
7156 first floating point register is used for floating point values. */
7159 sparc_function_value_regno_p (const unsigned int regno)
7161 return (regno == 8 || regno == 32);
7164 /* Do what is necessary for `va_start'. We look at the current function
7165 to determine if stdarg or varargs is used and return the address of
7166 the first unnamed parameter. */
7169 sparc_builtin_saveregs (void)
7171 int first_reg = crtl->args.info.words;
7175 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7176 emit_move_insn (gen_rtx_MEM (word_mode,
7177 gen_rtx_PLUS (Pmode,
7179 GEN_INT (FIRST_PARM_OFFSET (0)
7182 gen_rtx_REG (word_mode,
7183 SPARC_INCOMING_INT_ARG_FIRST + regno));
7185 address = gen_rtx_PLUS (Pmode,
7187 GEN_INT (FIRST_PARM_OFFSET (0)
7188 + UNITS_PER_WORD * first_reg));
7193 /* Implement `va_start' for stdarg. */
7196 sparc_va_start (tree valist, rtx nextarg)
7198 nextarg = expand_builtin_saveregs ();
7199 std_expand_builtin_va_start (valist, nextarg);
7202 /* Implement `va_arg' for stdarg. */
7205 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7208 HOST_WIDE_INT size, rsize, align;
7211 tree ptrtype = build_pointer_type (type);
7213 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7216 size = rsize = UNITS_PER_WORD;
7222 size = int_size_in_bytes (type);
7223 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7228 /* For SPARC64, objects requiring 16-byte alignment get it. */
7229 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7230 align = 2 * UNITS_PER_WORD;
7232 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7233 are left-justified in their slots. */
7234 if (AGGREGATE_TYPE_P (type))
7237 size = rsize = UNITS_PER_WORD;
7247 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7248 incr = fold_convert (sizetype, incr);
7249 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7251 incr = fold_convert (ptr_type_node, incr);
7254 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7257 if (BYTES_BIG_ENDIAN && size < rsize)
7258 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7262 addr = fold_convert (build_pointer_type (ptrtype), addr);
7263 addr = build_va_arg_indirect_ref (addr);
7266 /* If the address isn't aligned properly for the type, we need a temporary.
7267 FIXME: This is inefficient, usually we can do this in registers. */
7268 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7270 tree tmp = create_tmp_var (type, "va_arg_tmp");
7271 tree dest_addr = build_fold_addr_expr (tmp);
7272 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7273 3, dest_addr, addr, size_int (rsize));
7274 TREE_ADDRESSABLE (tmp) = 1;
7275 gimplify_and_add (copy, pre_p);
7280 addr = fold_convert (ptrtype, addr);
7282 incr = fold_build_pointer_plus_hwi (incr, rsize);
7283 gimplify_assign (valist, incr, post_p);
7285 return build_va_arg_indirect_ref (addr);
7288 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7289 Specify whether the vector mode is supported by the hardware. */
7292 sparc_vector_mode_supported_p (enum machine_mode mode)
7294 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7297 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7299 static enum machine_mode
7300 sparc_preferred_simd_mode (enum machine_mode mode)
7318 /* Return the string to output an unconditional branch to LABEL, which is
7319 the operand number of the label.
7321 DEST is the destination insn (i.e. the label), INSN is the source. */
7324 output_ubranch (rtx dest, rtx insn)
7326 static char string[64];
7327 bool v9_form = false;
7331 /* Even if we are trying to use cbcond for this, evaluate
7332 whether we can use V9 branches as our backup plan. */
7335 if (INSN_ADDRESSES_SET_P ())
7336 delta = (INSN_ADDRESSES (INSN_UID (dest))
7337 - INSN_ADDRESSES (INSN_UID (insn)));
7339 /* Leave some instructions for "slop". */
7340 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7345 bool emit_nop = emit_cbcond_nop (insn);
7349 if (delta < -500 || delta > 500)
7355 rval = "ba,a,pt\t%%xcc, %l0";
7362 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7364 rval = "cwbe\t%%g0, %%g0, %l0";
7370 strcpy (string, "ba%*,pt\t%%xcc, ");
7372 strcpy (string, "b%*\t");
7374 p = strchr (string, '\0');
7385 /* Return the string to output a conditional branch to LABEL, which is
7386 the operand number of the label. OP is the conditional expression.
7387 XEXP (OP, 0) is assumed to be a condition code register (integer or
7388 floating point) and its mode specifies what kind of comparison we made.
7390 DEST is the destination insn (i.e. the label), INSN is the source.
7392 REVERSED is nonzero if we should reverse the sense of the comparison.
7394 ANNUL is nonzero if we should generate an annulling branch. */
7397 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7400 static char string[64];
7401 enum rtx_code code = GET_CODE (op);
7402 rtx cc_reg = XEXP (op, 0);
7403 enum machine_mode mode = GET_MODE (cc_reg);
7404 const char *labelno, *branch;
7405 int spaces = 8, far;
7408 /* v9 branches are limited to +-1MB. If it is too far away,
7421 fbne,a,pn %fcc2, .LC29
7429 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7432 /* Reversal of FP compares takes care -- an ordered compare
7433 becomes an unordered compare and vice versa. */
7434 if (mode == CCFPmode || mode == CCFPEmode)
7435 code = reverse_condition_maybe_unordered (code);
7437 code = reverse_condition (code);
7440 /* Start by writing the branch condition. */
7441 if (mode == CCFPmode || mode == CCFPEmode)
7492 /* ??? !v9: FP branches cannot be preceded by another floating point
7493 insn. Because there is currently no concept of pre-delay slots,
7494 we can fix this only by always emitting a nop before a floating
7499 strcpy (string, "nop\n\t");
7500 strcat (string, branch);
7513 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7525 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7546 strcpy (string, branch);
7548 spaces -= strlen (branch);
7549 p = strchr (string, '\0');
7551 /* Now add the annulling, the label, and a possible noop. */
7564 if (! far && insn && INSN_ADDRESSES_SET_P ())
7566 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7567 - INSN_ADDRESSES (INSN_UID (insn)));
7568 /* Leave some instructions for "slop". */
7569 if (delta < -260000 || delta >= 260000)
7573 if (mode == CCFPmode || mode == CCFPEmode)
7575 static char v9_fcc_labelno[] = "%%fccX, ";
7576 /* Set the char indicating the number of the fcc reg to use. */
7577 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7578 labelno = v9_fcc_labelno;
7581 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7585 else if (mode == CCXmode || mode == CCX_NOOVmode)
7587 labelno = "%%xcc, ";
7592 labelno = "%%icc, ";
7597 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7600 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7613 strcpy (p, labelno);
7614 p = strchr (p, '\0');
7617 strcpy (p, ".+12\n\t nop\n\tb\t");
7618 /* Skip the next insn if requested or
7619 if we know that it will be a nop. */
7620 if (annul || ! final_sequence)
7634 /* Emit a library call comparison between floating point X and Y.
7635 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7636 Return the new operator to be used in the comparison sequence.
7638 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7639 values as arguments instead of the TFmode registers themselves,
7640 that's why we cannot call emit_float_lib_cmp. */
7643 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7646 rtx slot0, slot1, result, tem, tem2, libfunc;
7647 enum machine_mode mode;
7648 enum rtx_code new_comparison;
7653 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7657 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7661 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7665 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7669 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7673 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7684 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7695 tree expr = MEM_EXPR (x);
7697 mark_addressable (expr);
7702 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7703 emit_move_insn (slot0, x);
7708 tree expr = MEM_EXPR (y);
7710 mark_addressable (expr);
7715 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7716 emit_move_insn (slot1, y);
7719 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7720 emit_library_call (libfunc, LCT_NORMAL,
7722 XEXP (slot0, 0), Pmode,
7723 XEXP (slot1, 0), Pmode);
7728 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7729 emit_library_call (libfunc, LCT_NORMAL,
7731 x, TFmode, y, TFmode);
7736 /* Immediately move the result of the libcall into a pseudo
7737 register so reload doesn't clobber the value if it needs
7738 the return register for a spill reg. */
7739 result = gen_reg_rtx (mode);
7740 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7745 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7748 new_comparison = (comparison == UNORDERED ? EQ : NE);
7749 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7752 new_comparison = (comparison == UNGT ? GT : NE);
7753 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7755 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7757 tem = gen_reg_rtx (mode);
7759 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7761 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7762 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7765 tem = gen_reg_rtx (mode);
7767 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7769 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7770 tem2 = gen_reg_rtx (mode);
7772 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7774 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7775 new_comparison = (comparison == UNEQ ? EQ : NE);
7776 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7782 /* Generate an unsigned DImode to FP conversion. This is the same code
7783 optabs would emit if we didn't have TFmode patterns. */
7786 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7788 rtx neglab, donelab, i0, i1, f0, in, out;
7791 in = force_reg (DImode, operands[1]);
7792 neglab = gen_label_rtx ();
7793 donelab = gen_label_rtx ();
7794 i0 = gen_reg_rtx (DImode);
7795 i1 = gen_reg_rtx (DImode);
7796 f0 = gen_reg_rtx (mode);
7798 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7800 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7801 emit_jump_insn (gen_jump (donelab));
7804 emit_label (neglab);
7806 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
7807 emit_insn (gen_anddi3 (i1, in, const1_rtx));
7808 emit_insn (gen_iordi3 (i0, i0, i1));
7809 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
7810 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
7812 emit_label (donelab);
7815 /* Generate an FP to unsigned DImode conversion. This is the same code
7816 optabs would emit if we didn't have TFmode patterns. */
7819 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
7821 rtx neglab, donelab, i0, i1, f0, in, out, limit;
7824 in = force_reg (mode, operands[1]);
7825 neglab = gen_label_rtx ();
7826 donelab = gen_label_rtx ();
7827 i0 = gen_reg_rtx (DImode);
7828 i1 = gen_reg_rtx (DImode);
7829 limit = gen_reg_rtx (mode);
7830 f0 = gen_reg_rtx (mode);
7832 emit_move_insn (limit,
7833 CONST_DOUBLE_FROM_REAL_VALUE (
7834 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
7835 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
7837 emit_insn (gen_rtx_SET (VOIDmode,
7839 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
7840 emit_jump_insn (gen_jump (donelab));
7843 emit_label (neglab);
7845 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
7846 emit_insn (gen_rtx_SET (VOIDmode,
7848 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
7849 emit_insn (gen_movdi (i1, const1_rtx));
7850 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
7851 emit_insn (gen_xordi3 (out, i0, i1));
7853 emit_label (donelab);
7856 /* Return the string to output a compare and branch instruction to DEST.
7857 DEST is the destination insn (i.e. the label), INSN is the source,
7858 and OP is the conditional expression. */
7861 output_cbcond (rtx op, rtx dest, rtx insn)
7863 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7864 enum rtx_code code = GET_CODE (op);
7865 const char *cond_str, *tmpl;
7866 int far, emit_nop, len;
7867 static char string[64];
7870 /* Compare and Branch is limited to +-2KB. If it is too far away,
7882 len = get_attr_length (insn);
7885 emit_nop = len == 2;
7888 code = reverse_condition (code);
7890 size_char = ((mode == SImode) ? 'w' : 'x');
7903 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7918 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7946 int veryfar = 1, delta;
7948 if (INSN_ADDRESSES_SET_P ())
7950 delta = (INSN_ADDRESSES (INSN_UID (dest))
7951 - INSN_ADDRESSES (INSN_UID (insn)));
7952 /* Leave some instructions for "slop". */
7953 if (delta >= -260000 && delta < 260000)
7958 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
7960 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
7965 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
7967 tmpl = "c%cb%s\t%%1, %%2, %%3";
7970 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
7975 /* Return the string to output a conditional branch to LABEL, testing
7976 register REG. LABEL is the operand number of the label; REG is the
7977 operand number of the reg. OP is the conditional expression. The mode
7978 of REG says what kind of comparison we made.
7980 DEST is the destination insn (i.e. the label), INSN is the source.
7982 REVERSED is nonzero if we should reverse the sense of the comparison.
7984 ANNUL is nonzero if we should generate an annulling branch. */
7987 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
7988 int annul, rtx insn)
7990 static char string[64];
7991 enum rtx_code code = GET_CODE (op);
7992 enum machine_mode mode = GET_MODE (XEXP (op, 0));
7997 /* branch on register are limited to +-128KB. If it is too far away,
8010 brgez,a,pn %o1, .LC29
8016 ba,pt %xcc, .LC29 */
8018 far = get_attr_length (insn) >= 3;
8020 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8022 code = reverse_condition (code);
8024 /* Only 64 bit versions of these instructions exist. */
8025 gcc_assert (mode == DImode);
8027 /* Start by writing the branch condition. */
8032 strcpy (string, "brnz");
8036 strcpy (string, "brz");
8040 strcpy (string, "brgez");
8044 strcpy (string, "brlz");
8048 strcpy (string, "brlez");
8052 strcpy (string, "brgz");
8059 p = strchr (string, '\0');
8061 /* Now add the annulling, reg, label, and nop. */
8068 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8071 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
8076 *p = p < string + 8 ? '\t' : ' ';
8084 int veryfar = 1, delta;
8086 if (INSN_ADDRESSES_SET_P ())
8088 delta = (INSN_ADDRESSES (INSN_UID (dest))
8089 - INSN_ADDRESSES (INSN_UID (insn)));
8090 /* Leave some instructions for "slop". */
8091 if (delta >= -260000 && delta < 260000)
8095 strcpy (p, ".+12\n\t nop\n\t");
8096 /* Skip the next insn if requested or
8097 if we know that it will be a nop. */
8098 if (annul || ! final_sequence)
8108 strcpy (p, "ba,pt\t%%xcc, ");
8122 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8123 Such instructions cannot be used in the delay slot of return insn on v9.
8124 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8128 epilogue_renumber (register rtx *where, int test)
8130 register const char *fmt;
8132 register enum rtx_code code;
8137 code = GET_CODE (*where);
8142 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8144 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8145 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8153 /* Do not replace the frame pointer with the stack pointer because
8154 it can cause the delayed instruction to load below the stack.
8155 This occurs when instructions like:
8157 (set (reg/i:SI 24 %i0)
8158 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8159 (const_int -20 [0xffffffec])) 0))
8161 are in the return delayed slot. */
8163 if (GET_CODE (XEXP (*where, 0)) == REG
8164 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8165 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8166 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8171 if (SPARC_STACK_BIAS
8172 && GET_CODE (XEXP (*where, 0)) == REG
8173 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8181 fmt = GET_RTX_FORMAT (code);
8183 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8188 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8189 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8192 else if (fmt[i] == 'e'
8193 && epilogue_renumber (&(XEXP (*where, i)), test))
8199 /* Leaf functions and non-leaf functions have different needs. */
8202 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8205 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8207 static const int *const reg_alloc_orders[] = {
8208 reg_leaf_alloc_order,
8209 reg_nonleaf_alloc_order};
8212 order_regs_for_local_alloc (void)
8214 static int last_order_nonleaf = 1;
8216 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8218 last_order_nonleaf = !last_order_nonleaf;
8219 memcpy ((char *) reg_alloc_order,
8220 (const char *) reg_alloc_orders[last_order_nonleaf],
8221 FIRST_PSEUDO_REGISTER * sizeof (int));
8225 /* Return 1 if REG and MEM are legitimate enough to allow the various
8226 mem<-->reg splits to be run. */
8229 sparc_splitdi_legitimate (rtx reg, rtx mem)
8231 /* Punt if we are here by mistake. */
8232 gcc_assert (reload_completed);
8234 /* We must have an offsettable memory reference. */
8235 if (! offsettable_memref_p (mem))
8238 /* If we have legitimate args for ldd/std, we do not want
8239 the split to happen. */
8240 if ((REGNO (reg) % 2) == 0
8241 && mem_min_alignment (mem, 8))
8248 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8251 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8255 if (GET_CODE (reg1) == SUBREG)
8256 reg1 = SUBREG_REG (reg1);
8257 if (GET_CODE (reg1) != REG)
8259 regno1 = REGNO (reg1);
8261 if (GET_CODE (reg2) == SUBREG)
8262 reg2 = SUBREG_REG (reg2);
8263 if (GET_CODE (reg2) != REG)
8265 regno2 = REGNO (reg2);
8267 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8272 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8273 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8280 /* Return 1 if x and y are some kind of REG and they refer to
8281 different hard registers. This test is guaranteed to be
8282 run after reload. */
8285 sparc_absnegfloat_split_legitimate (rtx x, rtx y)
8287 if (GET_CODE (x) != REG)
8289 if (GET_CODE (y) != REG)
8291 if (REGNO (x) == REGNO (y))
8296 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8297 This makes them candidates for using ldd and std insns.
8299 Note reg1 and reg2 *must* be hard registers. */
8302 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8304 /* We might have been passed a SUBREG. */
8305 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8308 if (REGNO (reg1) % 2 != 0)
8311 /* Integer ldd is deprecated in SPARC V9 */
8312 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8315 return (REGNO (reg1) == REGNO (reg2) - 1);
8318 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8321 This can only happen when addr1 and addr2, the addresses in mem1
8322 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8323 addr1 must also be aligned on a 64-bit boundary.
8325 Also iff dependent_reg_rtx is not null it should not be used to
8326 compute the address for mem1, i.e. we cannot optimize a sequence
8338 But, note that the transformation from:
8343 is perfectly fine. Thus, the peephole2 patterns always pass us
8344 the destination register of the first load, never the second one.
8346 For stores we don't have a similar problem, so dependent_reg_rtx is
8350 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8354 HOST_WIDE_INT offset1;
8356 /* The mems cannot be volatile. */
8357 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8360 /* MEM1 should be aligned on a 64-bit boundary. */
8361 if (MEM_ALIGN (mem1) < 64)
8364 addr1 = XEXP (mem1, 0);
8365 addr2 = XEXP (mem2, 0);
8367 /* Extract a register number and offset (if used) from the first addr. */
8368 if (GET_CODE (addr1) == PLUS)
8370 /* If not a REG, return zero. */
8371 if (GET_CODE (XEXP (addr1, 0)) != REG)
8375 reg1 = REGNO (XEXP (addr1, 0));
8376 /* The offset must be constant! */
8377 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8379 offset1 = INTVAL (XEXP (addr1, 1));
8382 else if (GET_CODE (addr1) != REG)
8386 reg1 = REGNO (addr1);
8387 /* This was a simple (mem (reg)) expression. Offset is 0. */
8391 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8392 if (GET_CODE (addr2) != PLUS)
8395 if (GET_CODE (XEXP (addr2, 0)) != REG
8396 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8399 if (reg1 != REGNO (XEXP (addr2, 0)))
8402 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8405 /* The first offset must be evenly divisible by 8 to ensure the
8406 address is 64 bit aligned. */
8407 if (offset1 % 8 != 0)
8410 /* The offset for the second addr must be 4 more than the first addr. */
8411 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8414 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8419 /* Return 1 if reg is a pseudo, or is the first register in
8420 a hard register pair. This makes it suitable for use in
8421 ldd and std insns. */
8424 register_ok_for_ldd (rtx reg)
8426 /* We might have been passed a SUBREG. */
8430 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8431 return (REGNO (reg) % 2 == 0);
8436 /* Return 1 if OP, a MEM, has an address which is known to be
8437 aligned to an 8-byte boundary. */
8440 memory_ok_for_ldd (rtx op)
8442 /* In 64-bit mode, we assume that the address is word-aligned. */
8443 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8446 if (! can_create_pseudo_p ()
8447 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8453 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8456 sparc_print_operand_punct_valid_p (unsigned char code)
8469 /* Implement TARGET_PRINT_OPERAND.
8470 Print operand X (an rtx) in assembler syntax to file FILE.
8471 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8472 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8475 sparc_print_operand (FILE *file, rtx x, int code)
8480 /* Output an insn in a delay slot. */
8482 sparc_indent_opcode = 1;
8484 fputs ("\n\t nop", file);
8487 /* Output an annul flag if there's nothing for the delay slot and we
8488 are optimizing. This is always used with '(' below.
8489 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8490 this is a dbx bug. So, we only do this when optimizing.
8491 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8492 Always emit a nop in case the next instruction is a branch. */
8493 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8497 /* Output a 'nop' if there's nothing for the delay slot and we are
8498 not optimizing. This is always used with '*' above. */
8499 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8500 fputs ("\n\t nop", file);
8501 else if (final_sequence)
8502 sparc_indent_opcode = 1;
8505 /* Output the right displacement from the saved PC on function return.
8506 The caller may have placed an "unimp" insn immediately after the call
8507 so we have to account for it. This insn is used in the 32-bit ABI
8508 when calling a function that returns a non zero-sized structure. The
8509 64-bit ABI doesn't have it. Be careful to have this test be the same
8510 as that for the call. The exception is when sparc_std_struct_return
8511 is enabled, the psABI is followed exactly and the adjustment is made
8512 by the code in sparc_struct_value_rtx. The call emitted is the same
8513 when sparc_std_struct_return is enabled. */
8515 && cfun->returns_struct
8516 && !sparc_std_struct_return
8517 && DECL_SIZE (DECL_RESULT (current_function_decl))
8518 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8520 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8526 /* Output the Embedded Medium/Anywhere code model base register. */
8527 fputs (EMBMEDANY_BASE_REG, file);
8530 /* Print some local dynamic TLS name. */
8531 assemble_name (file, get_some_local_dynamic_name ());
8535 /* Adjust the operand to take into account a RESTORE operation. */
8536 if (GET_CODE (x) == CONST_INT)
8538 else if (GET_CODE (x) != REG)
8539 output_operand_lossage ("invalid %%Y operand");
8540 else if (REGNO (x) < 8)
8541 fputs (reg_names[REGNO (x)], file);
8542 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8543 fputs (reg_names[REGNO (x)-16], file);
8545 output_operand_lossage ("invalid %%Y operand");
8548 /* Print out the low order register name of a register pair. */
8549 if (WORDS_BIG_ENDIAN)
8550 fputs (reg_names[REGNO (x)+1], file);
8552 fputs (reg_names[REGNO (x)], file);
8555 /* Print out the high order register name of a register pair. */
8556 if (WORDS_BIG_ENDIAN)
8557 fputs (reg_names[REGNO (x)], file);
8559 fputs (reg_names[REGNO (x)+1], file);
8562 /* Print out the second register name of a register pair or quad.
8563 I.e., R (%o0) => %o1. */
8564 fputs (reg_names[REGNO (x)+1], file);
8567 /* Print out the third register name of a register quad.
8568 I.e., S (%o0) => %o2. */
8569 fputs (reg_names[REGNO (x)+2], file);
8572 /* Print out the fourth register name of a register quad.
8573 I.e., T (%o0) => %o3. */
8574 fputs (reg_names[REGNO (x)+3], file);
8577 /* Print a condition code register. */
8578 if (REGNO (x) == SPARC_ICC_REG)
8580 /* We don't handle CC[X]_NOOVmode because they're not supposed
8582 if (GET_MODE (x) == CCmode)
8583 fputs ("%icc", file);
8584 else if (GET_MODE (x) == CCXmode)
8585 fputs ("%xcc", file);
8590 /* %fccN register */
8591 fputs (reg_names[REGNO (x)], file);
8594 /* Print the operand's address only. */
8595 output_address (XEXP (x, 0));
8598 /* In this case we need a register. Use %g0 if the
8599 operand is const0_rtx. */
8601 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8603 fputs ("%g0", file);
8610 switch (GET_CODE (x))
8612 case IOR: fputs ("or", file); break;
8613 case AND: fputs ("and", file); break;
8614 case XOR: fputs ("xor", file); break;
8615 default: output_operand_lossage ("invalid %%A operand");
8620 switch (GET_CODE (x))
8622 case IOR: fputs ("orn", file); break;
8623 case AND: fputs ("andn", file); break;
8624 case XOR: fputs ("xnor", file); break;
8625 default: output_operand_lossage ("invalid %%B operand");
8629 /* This is used by the conditional move instructions. */
8632 enum rtx_code rc = GET_CODE (x);
8636 case NE: fputs ("ne", file); break;
8637 case EQ: fputs ("e", file); break;
8638 case GE: fputs ("ge", file); break;
8639 case GT: fputs ("g", file); break;
8640 case LE: fputs ("le", file); break;
8641 case LT: fputs ("l", file); break;
8642 case GEU: fputs ("geu", file); break;
8643 case GTU: fputs ("gu", file); break;
8644 case LEU: fputs ("leu", file); break;
8645 case LTU: fputs ("lu", file); break;
8646 case LTGT: fputs ("lg", file); break;
8647 case UNORDERED: fputs ("u", file); break;
8648 case ORDERED: fputs ("o", file); break;
8649 case UNLT: fputs ("ul", file); break;
8650 case UNLE: fputs ("ule", file); break;
8651 case UNGT: fputs ("ug", file); break;
8652 case UNGE: fputs ("uge", file); break;
8653 case UNEQ: fputs ("ue", file); break;
8654 default: output_operand_lossage ("invalid %%C operand");
8659 /* This are used by the movr instruction pattern. */
8662 enum rtx_code rc = GET_CODE (x);
8665 case NE: fputs ("ne", file); break;
8666 case EQ: fputs ("e", file); break;
8667 case GE: fputs ("gez", file); break;
8668 case LT: fputs ("lz", file); break;
8669 case LE: fputs ("lez", file); break;
8670 case GT: fputs ("gz", file); break;
8671 default: output_operand_lossage ("invalid %%D operand");
8678 /* Print a sign-extended character. */
8679 int i = trunc_int_for_mode (INTVAL (x), QImode);
8680 fprintf (file, "%d", i);
8685 /* Operand must be a MEM; write its address. */
8686 if (GET_CODE (x) != MEM)
8687 output_operand_lossage ("invalid %%f operand");
8688 output_address (XEXP (x, 0));
8693 /* Print a sign-extended 32-bit value. */
8695 if (GET_CODE(x) == CONST_INT)
8697 else if (GET_CODE(x) == CONST_DOUBLE)
8698 i = CONST_DOUBLE_LOW (x);
8701 output_operand_lossage ("invalid %%s operand");
8704 i = trunc_int_for_mode (i, SImode);
8705 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8710 /* Do nothing special. */
8714 /* Undocumented flag. */
8715 output_operand_lossage ("invalid operand output code");
8718 if (GET_CODE (x) == REG)
8719 fputs (reg_names[REGNO (x)], file);
8720 else if (GET_CODE (x) == MEM)
8723 /* Poor Sun assembler doesn't understand absolute addressing. */
8724 if (CONSTANT_P (XEXP (x, 0)))
8725 fputs ("%g0+", file);
8726 output_address (XEXP (x, 0));
8729 else if (GET_CODE (x) == HIGH)
8731 fputs ("%hi(", file);
8732 output_addr_const (file, XEXP (x, 0));
8735 else if (GET_CODE (x) == LO_SUM)
8737 sparc_print_operand (file, XEXP (x, 0), 0);
8738 if (TARGET_CM_MEDMID)
8739 fputs ("+%l44(", file);
8741 fputs ("+%lo(", file);
8742 output_addr_const (file, XEXP (x, 1));
8745 else if (GET_CODE (x) == CONST_DOUBLE
8746 && (GET_MODE (x) == VOIDmode
8747 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8749 if (CONST_DOUBLE_HIGH (x) == 0)
8750 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8751 else if (CONST_DOUBLE_HIGH (x) == -1
8752 && CONST_DOUBLE_LOW (x) < 0)
8753 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8755 output_operand_lossage ("long long constant not a valid immediate operand");
8757 else if (GET_CODE (x) == CONST_DOUBLE)
8758 output_operand_lossage ("floating point constant not a valid immediate operand");
8759 else { output_addr_const (file, x); }
8762 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8765 sparc_print_operand_address (FILE *file, rtx x)
8767 register rtx base, index = 0;
8769 register rtx addr = x;
8772 fputs (reg_names[REGNO (addr)], file);
8773 else if (GET_CODE (addr) == PLUS)
8775 if (CONST_INT_P (XEXP (addr, 0)))
8776 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8777 else if (CONST_INT_P (XEXP (addr, 1)))
8778 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8780 base = XEXP (addr, 0), index = XEXP (addr, 1);
8781 if (GET_CODE (base) == LO_SUM)
8783 gcc_assert (USE_AS_OFFSETABLE_LO10
8785 && ! TARGET_CM_MEDMID);
8786 output_operand (XEXP (base, 0), 0);
8787 fputs ("+%lo(", file);
8788 output_address (XEXP (base, 1));
8789 fprintf (file, ")+%d", offset);
8793 fputs (reg_names[REGNO (base)], file);
8795 fprintf (file, "%+d", offset);
8796 else if (REG_P (index))
8797 fprintf (file, "+%s", reg_names[REGNO (index)]);
8798 else if (GET_CODE (index) == SYMBOL_REF
8799 || GET_CODE (index) == LABEL_REF
8800 || GET_CODE (index) == CONST)
8801 fputc ('+', file), output_addr_const (file, index);
8802 else gcc_unreachable ();
8805 else if (GET_CODE (addr) == MINUS
8806 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8808 output_addr_const (file, XEXP (addr, 0));
8810 output_addr_const (file, XEXP (addr, 1));
8811 fputs ("-.)", file);
8813 else if (GET_CODE (addr) == LO_SUM)
8815 output_operand (XEXP (addr, 0), 0);
8816 if (TARGET_CM_MEDMID)
8817 fputs ("+%l44(", file);
8819 fputs ("+%lo(", file);
8820 output_address (XEXP (addr, 1));
8824 && GET_CODE (addr) == CONST
8825 && GET_CODE (XEXP (addr, 0)) == MINUS
8826 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
8827 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
8828 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
8830 addr = XEXP (addr, 0);
8831 output_addr_const (file, XEXP (addr, 0));
8832 /* Group the args of the second CONST in parenthesis. */
8834 /* Skip past the second CONST--it does nothing for us. */
8835 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
8836 /* Close the parenthesis. */
8841 output_addr_const (file, addr);
8845 /* Target hook for assembling integer objects. The sparc version has
8846 special handling for aligned DI-mode objects. */
8849 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
8851 /* ??? We only output .xword's for symbols and only then in environments
8852 where the assembler can handle them. */
8853 if (aligned_p && size == 8
8854 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
8858 assemble_integer_with_op ("\t.xword\t", x);
8863 assemble_aligned_integer (4, const0_rtx);
8864 assemble_aligned_integer (4, x);
8868 return default_assemble_integer (x, size, aligned_p);
8871 /* Return the value of a code used in the .proc pseudo-op that says
8872 what kind of result this function returns. For non-C types, we pick
8873 the closest C type. */
8875 #ifndef SHORT_TYPE_SIZE
8876 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
8879 #ifndef INT_TYPE_SIZE
8880 #define INT_TYPE_SIZE BITS_PER_WORD
8883 #ifndef LONG_TYPE_SIZE
8884 #define LONG_TYPE_SIZE BITS_PER_WORD
8887 #ifndef LONG_LONG_TYPE_SIZE
8888 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
8891 #ifndef FLOAT_TYPE_SIZE
8892 #define FLOAT_TYPE_SIZE BITS_PER_WORD
8895 #ifndef DOUBLE_TYPE_SIZE
8896 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8899 #ifndef LONG_DOUBLE_TYPE_SIZE
8900 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
8904 sparc_type_code (register tree type)
8906 register unsigned long qualifiers = 0;
8907 register unsigned shift;
8909 /* Only the first 30 bits of the qualifier are valid. We must refrain from
8910 setting more, since some assemblers will give an error for this. Also,
8911 we must be careful to avoid shifts of 32 bits or more to avoid getting
8912 unpredictable results. */
8914 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
8916 switch (TREE_CODE (type))
8922 qualifiers |= (3 << shift);
8927 qualifiers |= (2 << shift);
8931 case REFERENCE_TYPE:
8933 qualifiers |= (1 << shift);
8937 return (qualifiers | 8);
8940 case QUAL_UNION_TYPE:
8941 return (qualifiers | 9);
8944 return (qualifiers | 10);
8947 return (qualifiers | 16);
8950 /* If this is a range type, consider it to be the underlying
8952 if (TREE_TYPE (type) != 0)
8955 /* Carefully distinguish all the standard types of C,
8956 without messing up if the language is not C. We do this by
8957 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
8958 look at both the names and the above fields, but that's redundant.
8959 Any type whose size is between two C types will be considered
8960 to be the wider of the two types. Also, we do not have a
8961 special code to use for "long long", so anything wider than
8962 long is treated the same. Note that we can't distinguish
8963 between "int" and "long" in this code if they are the same
8964 size, but that's fine, since neither can the assembler. */
8966 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
8967 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
8969 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
8970 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
8972 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
8973 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
8976 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
8979 /* If this is a range type, consider it to be the underlying
8981 if (TREE_TYPE (type) != 0)
8984 /* Carefully distinguish all the standard types of C,
8985 without messing up if the language is not C. */
8987 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
8988 return (qualifiers | 6);
8991 return (qualifiers | 7);
8993 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
8994 /* ??? We need to distinguish between double and float complex types,
8995 but I don't know how yet because I can't reach this code from
8996 existing front-ends. */
8997 return (qualifiers | 7); /* Who knows? */
9000 case BOOLEAN_TYPE: /* Boolean truth value type. */
9006 gcc_unreachable (); /* Not a type! */
9013 /* Nested function support. */
9015 /* Emit RTL insns to initialize the variable parts of a trampoline.
9016 FNADDR is an RTX for the address of the function's pure code.
9017 CXT is an RTX for the static chain value for the function.
9019 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9020 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9021 (to store insns). This is a bit excessive. Perhaps a different
9022 mechanism would be better here.
9024 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9027 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9029 /* SPARC 32-bit trampoline:
9032 sethi %hi(static), %g2
9034 or %g2, %lo(static), %g2
9036 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9037 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9041 (adjust_address (m_tramp, SImode, 0),
9042 expand_binop (SImode, ior_optab,
9043 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9044 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9045 NULL_RTX, 1, OPTAB_DIRECT));
9048 (adjust_address (m_tramp, SImode, 4),
9049 expand_binop (SImode, ior_optab,
9050 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9051 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9052 NULL_RTX, 1, OPTAB_DIRECT));
9055 (adjust_address (m_tramp, SImode, 8),
9056 expand_binop (SImode, ior_optab,
9057 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9058 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9059 NULL_RTX, 1, OPTAB_DIRECT));
9062 (adjust_address (m_tramp, SImode, 12),
9063 expand_binop (SImode, ior_optab,
9064 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9065 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9066 NULL_RTX, 1, OPTAB_DIRECT));
9068 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9069 aligned on a 16 byte boundary so one flush clears it all. */
9070 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
9071 if (sparc_cpu != PROCESSOR_ULTRASPARC
9072 && sparc_cpu != PROCESSOR_ULTRASPARC3
9073 && sparc_cpu != PROCESSOR_NIAGARA
9074 && sparc_cpu != PROCESSOR_NIAGARA2
9075 && sparc_cpu != PROCESSOR_NIAGARA3
9076 && sparc_cpu != PROCESSOR_NIAGARA4)
9077 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
9079 /* Call __enable_execute_stack after writing onto the stack to make sure
9080 the stack address is accessible. */
9081 #ifdef HAVE_ENABLE_EXECUTE_STACK
9082 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9083 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9088 /* The 64-bit version is simpler because it makes more sense to load the
9089 values as "immediate" data out of the trampoline. It's also easier since
9090 we can read the PC without clobbering a register. */
9093 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9095 /* SPARC 64-bit trampoline:
9104 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9105 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9106 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9107 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9108 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9109 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9110 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9111 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9112 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9113 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9114 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9116 if (sparc_cpu != PROCESSOR_ULTRASPARC
9117 && sparc_cpu != PROCESSOR_ULTRASPARC3
9118 && sparc_cpu != PROCESSOR_NIAGARA
9119 && sparc_cpu != PROCESSOR_NIAGARA2
9120 && sparc_cpu != PROCESSOR_NIAGARA3
9121 && sparc_cpu != PROCESSOR_NIAGARA4)
9122 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9124 /* Call __enable_execute_stack after writing onto the stack to make sure
9125 the stack address is accessible. */
9126 #ifdef HAVE_ENABLE_EXECUTE_STACK
9127 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9128 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9132 /* Worker for TARGET_TRAMPOLINE_INIT. */
9135 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9137 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9138 cxt = force_reg (Pmode, cxt);
9140 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9142 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9145 /* Adjust the cost of a scheduling dependency. Return the new cost of
9146 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9149 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9151 enum attr_type insn_type;
9153 if (! recog_memoized (insn))
9156 insn_type = get_attr_type (insn);
9158 if (REG_NOTE_KIND (link) == 0)
9160 /* Data dependency; DEP_INSN writes a register that INSN reads some
9163 /* if a load, then the dependence must be on the memory address;
9164 add an extra "cycle". Note that the cost could be two cycles
9165 if the reg was written late in an instruction group; we ca not tell
9167 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9170 /* Get the delay only if the address of the store is the dependence. */
9171 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9173 rtx pat = PATTERN(insn);
9174 rtx dep_pat = PATTERN (dep_insn);
9176 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9177 return cost; /* This should not happen! */
9179 /* The dependency between the two instructions was on the data that
9180 is being stored. Assume that this implies that the address of the
9181 store is not dependent. */
9182 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9185 return cost + 3; /* An approximation. */
9188 /* A shift instruction cannot receive its data from an instruction
9189 in the same cycle; add a one cycle penalty. */
9190 if (insn_type == TYPE_SHIFT)
9191 return cost + 3; /* Split before cascade into shift. */
9195 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9196 INSN writes some cycles later. */
9198 /* These are only significant for the fpu unit; writing a fp reg before
9199 the fpu has finished with it stalls the processor. */
9201 /* Reusing an integer register causes no problems. */
9202 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9210 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9212 enum attr_type insn_type, dep_type;
9213 rtx pat = PATTERN(insn);
9214 rtx dep_pat = PATTERN (dep_insn);
9216 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9219 insn_type = get_attr_type (insn);
9220 dep_type = get_attr_type (dep_insn);
9222 switch (REG_NOTE_KIND (link))
9225 /* Data dependency; DEP_INSN writes a register that INSN reads some
9232 /* Get the delay iff the address of the store is the dependence. */
9233 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9236 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9243 /* If a load, then the dependence must be on the memory address. If
9244 the addresses aren't equal, then it might be a false dependency */
9245 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9247 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9248 || GET_CODE (SET_DEST (dep_pat)) != MEM
9249 || GET_CODE (SET_SRC (pat)) != MEM
9250 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9251 XEXP (SET_SRC (pat), 0)))
9259 /* Compare to branch latency is 0. There is no benefit from
9260 separating compare and branch. */
9261 if (dep_type == TYPE_COMPARE)
9263 /* Floating point compare to branch latency is less than
9264 compare to conditional move. */
9265 if (dep_type == TYPE_FPCMP)
9274 /* Anti-dependencies only penalize the fpu unit. */
9275 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9287 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9291 case PROCESSOR_SUPERSPARC:
9292 cost = supersparc_adjust_cost (insn, link, dep, cost);
9294 case PROCESSOR_HYPERSPARC:
9295 case PROCESSOR_SPARCLITE86X:
9296 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9305 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9306 int sched_verbose ATTRIBUTE_UNUSED,
9307 int max_ready ATTRIBUTE_UNUSED)
9311 sparc_use_sched_lookahead (void)
9313 if (sparc_cpu == PROCESSOR_NIAGARA
9314 || sparc_cpu == PROCESSOR_NIAGARA2
9315 || sparc_cpu == PROCESSOR_NIAGARA3)
9317 if (sparc_cpu == PROCESSOR_NIAGARA4)
9319 if (sparc_cpu == PROCESSOR_ULTRASPARC
9320 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9322 if ((1 << sparc_cpu) &
9323 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9324 (1 << PROCESSOR_SPARCLITE86X)))
9330 sparc_issue_rate (void)
9334 case PROCESSOR_NIAGARA:
9335 case PROCESSOR_NIAGARA2:
9336 case PROCESSOR_NIAGARA3:
9339 case PROCESSOR_NIAGARA4:
9341 /* Assume V9 processors are capable of at least dual-issue. */
9343 case PROCESSOR_SUPERSPARC:
9345 case PROCESSOR_HYPERSPARC:
9346 case PROCESSOR_SPARCLITE86X:
9348 case PROCESSOR_ULTRASPARC:
9349 case PROCESSOR_ULTRASPARC3:
9355 set_extends (rtx insn)
9357 register rtx pat = PATTERN (insn);
9359 switch (GET_CODE (SET_SRC (pat)))
9361 /* Load and some shift instructions zero extend. */
9364 /* sethi clears the high bits */
9366 /* LO_SUM is used with sethi. sethi cleared the high
9367 bits and the values used with lo_sum are positive */
9369 /* Store flag stores 0 or 1 */
9379 rtx op0 = XEXP (SET_SRC (pat), 0);
9380 rtx op1 = XEXP (SET_SRC (pat), 1);
9381 if (GET_CODE (op1) == CONST_INT)
9382 return INTVAL (op1) >= 0;
9383 if (GET_CODE (op0) != REG)
9385 if (sparc_check_64 (op0, insn) == 1)
9387 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9392 rtx op0 = XEXP (SET_SRC (pat), 0);
9393 rtx op1 = XEXP (SET_SRC (pat), 1);
9394 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9396 if (GET_CODE (op1) == CONST_INT)
9397 return INTVAL (op1) >= 0;
9398 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9401 return GET_MODE (SET_SRC (pat)) == SImode;
9402 /* Positive integers leave the high bits zero. */
9404 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9406 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9409 return - (GET_MODE (SET_SRC (pat)) == SImode);
9411 return sparc_check_64 (SET_SRC (pat), insn);
9417 /* We _ought_ to have only one kind per function, but... */
9418 static GTY(()) rtx sparc_addr_diff_list;
9419 static GTY(()) rtx sparc_addr_list;
9422 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9424 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9426 sparc_addr_diff_list
9427 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9429 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9433 sparc_output_addr_vec (rtx vec)
9435 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9436 int idx, vlen = XVECLEN (body, 0);
9438 #ifdef ASM_OUTPUT_ADDR_VEC_START
9439 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9442 #ifdef ASM_OUTPUT_CASE_LABEL
9443 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9446 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9449 for (idx = 0; idx < vlen; idx++)
9451 ASM_OUTPUT_ADDR_VEC_ELT
9452 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9455 #ifdef ASM_OUTPUT_ADDR_VEC_END
9456 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9461 sparc_output_addr_diff_vec (rtx vec)
9463 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9464 rtx base = XEXP (XEXP (body, 0), 0);
9465 int idx, vlen = XVECLEN (body, 1);
9467 #ifdef ASM_OUTPUT_ADDR_VEC_START
9468 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9471 #ifdef ASM_OUTPUT_CASE_LABEL
9472 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9475 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9478 for (idx = 0; idx < vlen; idx++)
9480 ASM_OUTPUT_ADDR_DIFF_ELT
9483 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9484 CODE_LABEL_NUMBER (base));
9487 #ifdef ASM_OUTPUT_ADDR_VEC_END
9488 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9493 sparc_output_deferred_case_vectors (void)
9498 if (sparc_addr_list == NULL_RTX
9499 && sparc_addr_diff_list == NULL_RTX)
9502 /* Align to cache line in the function's code section. */
9503 switch_to_section (current_function_section ());
9505 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9507 ASM_OUTPUT_ALIGN (asm_out_file, align);
9509 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9510 sparc_output_addr_vec (XEXP (t, 0));
9511 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9512 sparc_output_addr_diff_vec (XEXP (t, 0));
9514 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9517 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9518 unknown. Return 1 if the high bits are zero, -1 if the register is
9521 sparc_check_64 (rtx x, rtx insn)
9523 /* If a register is set only once it is safe to ignore insns this
9524 code does not know how to handle. The loop will either recognize
9525 the single set and return the correct value or fail to recognize
9530 gcc_assert (GET_CODE (x) == REG);
9532 if (GET_MODE (x) == DImode)
9533 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9535 if (flag_expensive_optimizations
9536 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9542 insn = get_last_insn_anywhere ();
9547 while ((insn = PREV_INSN (insn)))
9549 switch (GET_CODE (insn))
9562 rtx pat = PATTERN (insn);
9563 if (GET_CODE (pat) != SET)
9565 if (rtx_equal_p (x, SET_DEST (pat)))
9566 return set_extends (insn);
9567 if (y && rtx_equal_p (y, SET_DEST (pat)))
9568 return set_extends (insn);
9569 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9577 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9578 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9581 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9583 static char asm_code[60];
9585 /* The scratch register is only required when the destination
9586 register is not a 64-bit global or out register. */
9587 if (which_alternative != 2)
9588 operands[3] = operands[0];
9590 /* We can only shift by constants <= 63. */
9591 if (GET_CODE (operands[2]) == CONST_INT)
9592 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9594 if (GET_CODE (operands[1]) == CONST_INT)
9596 output_asm_insn ("mov\t%1, %3", operands);
9600 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9601 if (sparc_check_64 (operands[1], insn) <= 0)
9602 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9603 output_asm_insn ("or\t%L1, %3, %3", operands);
9606 strcpy (asm_code, opcode);
9608 if (which_alternative != 2)
9609 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9612 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9615 /* Output rtl to increment the profiler label LABELNO
9616 for profiling a function entry. */
9619 sparc_profile_hook (int labelno)
9624 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9625 if (NO_PROFILE_COUNTERS)
9627 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9631 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9632 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9633 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9637 #ifdef TARGET_SOLARIS
9638 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9641 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9642 tree decl ATTRIBUTE_UNUSED)
9644 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9646 solaris_elf_asm_comdat_section (name, flags, decl);
9650 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9652 if (!(flags & SECTION_DEBUG))
9653 fputs (",#alloc", asm_out_file);
9654 if (flags & SECTION_WRITE)
9655 fputs (",#write", asm_out_file);
9656 if (flags & SECTION_TLS)
9657 fputs (",#tls", asm_out_file);
9658 if (flags & SECTION_CODE)
9659 fputs (",#execinstr", asm_out_file);
9661 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9662 if (HAVE_AS_SPARC_NOBITS)
9664 if (flags & SECTION_BSS)
9665 fputs (",#nobits", asm_out_file);
9667 fputs (",#progbits", asm_out_file);
9670 fputc ('\n', asm_out_file);
9672 #endif /* TARGET_SOLARIS */
9674 /* We do not allow indirect calls to be optimized into sibling calls.
9676 We cannot use sibling calls when delayed branches are disabled
9677 because they will likely require the call delay slot to be filled.
9679 Also, on SPARC 32-bit we cannot emit a sibling call when the
9680 current function returns a structure. This is because the "unimp
9681 after call" convention would cause the callee to return to the
9682 wrong place. The generic code already disallows cases where the
9683 function being called returns a structure.
9685 It may seem strange how this last case could occur. Usually there
9686 is code after the call which jumps to epilogue code which dumps the
9687 return value into the struct return area. That ought to invalidate
9688 the sibling call right? Well, in the C++ case we can end up passing
9689 the pointer to the struct return area to a constructor (which returns
9690 void) and then nothing else happens. Such a sibling call would look
9691 valid without the added check here.
9693 VxWorks PIC PLT entries require the global pointer to be initialized
9694 on entry. We therefore can't emit sibling calls to them. */
9696 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9699 && flag_delayed_branch
9700 && (TARGET_ARCH64 || ! cfun->returns_struct)
9701 && !(TARGET_VXWORKS_RTP
9703 && !targetm.binds_local_p (decl)));
9706 /* libfunc renaming. */
9709 sparc_init_libfuncs (void)
9713 /* Use the subroutines that Sun's library provides for integer
9714 multiply and divide. The `*' prevents an underscore from
9715 being prepended by the compiler. .umul is a little faster
9717 set_optab_libfunc (smul_optab, SImode, "*.umul");
9718 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9719 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9720 set_optab_libfunc (smod_optab, SImode, "*.rem");
9721 set_optab_libfunc (umod_optab, SImode, "*.urem");
9723 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9724 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9725 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9726 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9727 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9728 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9730 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9731 is because with soft-float, the SFmode and DFmode sqrt
9732 instructions will be absent, and the compiler will notice and
9733 try to use the TFmode sqrt instruction for calls to the
9734 builtin function sqrt, but this fails. */
9736 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9738 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9739 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9740 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9741 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9742 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9743 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9745 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9746 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9747 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9748 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9750 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9751 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9752 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9753 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9755 if (DITF_CONVERSION_LIBFUNCS)
9757 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9758 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9759 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9760 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9763 if (SUN_CONVERSION_LIBFUNCS)
9765 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9766 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9767 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9768 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9773 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9774 do not exist in the library. Make sure the compiler does not
9775 emit calls to them by accident. (It should always use the
9776 hardware instructions.) */
9777 set_optab_libfunc (smul_optab, SImode, 0);
9778 set_optab_libfunc (sdiv_optab, SImode, 0);
9779 set_optab_libfunc (udiv_optab, SImode, 0);
9780 set_optab_libfunc (smod_optab, SImode, 0);
9781 set_optab_libfunc (umod_optab, SImode, 0);
9783 if (SUN_INTEGER_MULTIPLY_64)
9785 set_optab_libfunc (smul_optab, DImode, "__mul64");
9786 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9787 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9788 set_optab_libfunc (smod_optab, DImode, "__rem64");
9789 set_optab_libfunc (umod_optab, DImode, "__urem64");
9792 if (SUN_CONVERSION_LIBFUNCS)
9794 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9795 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9796 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9797 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9802 static tree def_builtin(const char *name, int code, tree type)
9804 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9808 static tree def_builtin_const(const char *name, int code, tree type)
9810 tree t = def_builtin(name, code, type);
9813 TREE_READONLY (t) = 1;
9818 /* Implement the TARGET_INIT_BUILTINS target hook.
9819 Create builtin functions for special SPARC instructions. */
9822 sparc_init_builtins (void)
9825 sparc_vis_init_builtins ();
9828 /* Create builtin functions for VIS 1.0 instructions. */
9831 sparc_vis_init_builtins (void)
9833 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
9834 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
9835 tree v4hi = build_vector_type (intHI_type_node, 4);
9836 tree v2hi = build_vector_type (intHI_type_node, 2);
9837 tree v2si = build_vector_type (intSI_type_node, 2);
9838 tree v1si = build_vector_type (intSI_type_node, 1);
9840 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
9841 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
9842 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
9843 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
9844 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
9845 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
9846 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
9847 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
9848 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
9849 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
9850 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
9851 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
9852 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
9853 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
9854 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
9856 intDI_type_node, 0);
9857 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
9859 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
9861 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
9863 intDI_type_node, 0);
9864 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
9866 intSI_type_node, 0);
9867 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
9869 intSI_type_node, 0);
9870 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
9872 intDI_type_node, 0);
9873 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
9876 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
9879 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
9881 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
9883 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
9885 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
9887 tree void_ftype_di = build_function_type_list (void_type_node,
9888 intDI_type_node, 0);
9889 tree di_ftype_void = build_function_type_list (intDI_type_node,
9891 tree void_ftype_si = build_function_type_list (void_type_node,
9892 intSI_type_node, 0);
9893 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
9895 float_type_node, 0);
9896 tree df_ftype_df_df = build_function_type_list (double_type_node,
9898 double_type_node, 0);
9900 /* Packing and expanding vectors. */
9901 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
9903 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
9904 v8qi_ftype_v2si_v8qi);
9905 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
9907 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
9909 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
9910 v8qi_ftype_v4qi_v4qi);
9912 /* Multiplications. */
9913 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
9914 v4hi_ftype_v4qi_v4hi);
9915 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
9916 v4hi_ftype_v4qi_v2hi);
9917 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
9918 v4hi_ftype_v4qi_v2hi);
9919 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
9920 v4hi_ftype_v8qi_v4hi);
9921 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
9922 v4hi_ftype_v8qi_v4hi);
9923 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
9924 v2si_ftype_v4qi_v2hi);
9925 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
9926 v2si_ftype_v4qi_v2hi);
9928 /* Data aligning. */
9929 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
9930 v4hi_ftype_v4hi_v4hi);
9931 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
9932 v8qi_ftype_v8qi_v8qi);
9933 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
9934 v2si_ftype_v2si_v2si);
9935 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
9938 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
9940 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
9945 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
9947 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
9952 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
9954 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
9958 /* Pixel distance. */
9959 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
9960 di_ftype_v8qi_v8qi_di);
9962 /* Edge handling. */
9965 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
9967 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
9969 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
9971 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
9973 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
9975 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
9979 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
9981 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
9983 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
9985 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
9987 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
9989 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
9995 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
9997 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
9999 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10001 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10003 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10005 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10009 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10011 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10013 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10015 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10017 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10019 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10024 /* Pixel compare. */
10027 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10028 di_ftype_v4hi_v4hi);
10029 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10030 di_ftype_v2si_v2si);
10031 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10032 di_ftype_v4hi_v4hi);
10033 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10034 di_ftype_v2si_v2si);
10035 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10036 di_ftype_v4hi_v4hi);
10037 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10038 di_ftype_v2si_v2si);
10039 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10040 di_ftype_v4hi_v4hi);
10041 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10042 di_ftype_v2si_v2si);
10046 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10047 si_ftype_v4hi_v4hi);
10048 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10049 si_ftype_v2si_v2si);
10050 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10051 si_ftype_v4hi_v4hi);
10052 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10053 si_ftype_v2si_v2si);
10054 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10055 si_ftype_v4hi_v4hi);
10056 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10057 si_ftype_v2si_v2si);
10058 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10059 si_ftype_v4hi_v4hi);
10060 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10061 si_ftype_v2si_v2si);
10064 /* Addition and subtraction. */
10065 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10066 v4hi_ftype_v4hi_v4hi);
10067 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10068 v2hi_ftype_v2hi_v2hi);
10069 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10070 v2si_ftype_v2si_v2si);
10071 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10072 v1si_ftype_v1si_v1si);
10073 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10074 v4hi_ftype_v4hi_v4hi);
10075 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10076 v2hi_ftype_v2hi_v2hi);
10077 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10078 v2si_ftype_v2si_v2si);
10079 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10080 v1si_ftype_v1si_v1si);
10082 /* Three-dimensional array addressing. */
10085 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10087 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10089 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10094 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10096 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10098 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10104 /* Byte mask and shuffle */
10106 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10109 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10111 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10112 v4hi_ftype_v4hi_v4hi);
10113 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10114 v8qi_ftype_v8qi_v8qi);
10115 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10116 v2si_ftype_v2si_v2si);
10117 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10125 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10127 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10129 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10134 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10136 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10138 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10142 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10143 v4hi_ftype_v4hi_v4hi);
10145 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10146 v4hi_ftype_v4hi_v4hi);
10147 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10148 v4hi_ftype_v4hi_v4hi);
10149 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10150 v4hi_ftype_v4hi_v4hi);
10151 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10152 v4hi_ftype_v4hi_v4hi);
10153 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10154 v2si_ftype_v2si_v2si);
10155 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10156 v2si_ftype_v2si_v2si);
10157 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10158 v2si_ftype_v2si_v2si);
10159 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10160 v2si_ftype_v2si_v2si);
10163 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10164 di_ftype_v8qi_v8qi);
10166 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10167 si_ftype_v8qi_v8qi);
10169 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10170 v4hi_ftype_v4hi_v4hi);
10171 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10173 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10176 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10177 v4hi_ftype_v4hi_v4hi);
10178 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10179 v2hi_ftype_v2hi_v2hi);
10180 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10181 v4hi_ftype_v4hi_v4hi);
10182 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10183 v2hi_ftype_v2hi_v2hi);
10184 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10185 v2si_ftype_v2si_v2si);
10186 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10187 v1si_ftype_v1si_v1si);
10188 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10189 v2si_ftype_v2si_v2si);
10190 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10191 v1si_ftype_v1si_v1si);
10195 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10196 di_ftype_v8qi_v8qi);
10197 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10198 di_ftype_v8qi_v8qi);
10199 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10200 di_ftype_v8qi_v8qi);
10201 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10202 di_ftype_v8qi_v8qi);
10206 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10207 si_ftype_v8qi_v8qi);
10208 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10209 si_ftype_v8qi_v8qi);
10210 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10211 si_ftype_v8qi_v8qi);
10212 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10213 si_ftype_v8qi_v8qi);
10216 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10218 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10220 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10222 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10224 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10226 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10229 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10231 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10233 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10238 /* Handle TARGET_EXPAND_BUILTIN target hook.
10239 Expand builtin functions for sparc intrinsics. */
10242 sparc_expand_builtin (tree exp, rtx target,
10243 rtx subtarget ATTRIBUTE_UNUSED,
10244 enum machine_mode tmode ATTRIBUTE_UNUSED,
10245 int ignore ATTRIBUTE_UNUSED)
10248 call_expr_arg_iterator iter;
10249 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10250 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10255 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10259 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10261 || GET_MODE (target) != tmode
10262 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10263 op[0] = gen_reg_rtx (tmode);
10267 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10269 const struct insn_operand_data *insn_op;
10272 if (arg == error_mark_node)
10276 idx = arg_count - !nonvoid;
10277 insn_op = &insn_data[icode].operand[idx];
10278 op[arg_count] = expand_normal (arg);
10280 if (insn_op->mode == V1DImode
10281 && GET_MODE (op[arg_count]) == DImode)
10282 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10283 else if (insn_op->mode == V1SImode
10284 && GET_MODE (op[arg_count]) == SImode)
10285 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10287 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10289 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10295 pat = GEN_FCN (icode) (op[0]);
10299 pat = GEN_FCN (icode) (op[0], op[1]);
10301 pat = GEN_FCN (icode) (op[1]);
10304 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10307 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10310 gcc_unreachable ();
10325 sparc_vis_mul8x16 (int e8, int e16)
10327 return (e8 * e16 + 128) / 256;
10330 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10331 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10334 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10335 tree cst0, tree cst1)
10337 unsigned i, num = VECTOR_CST_NELTS (cst0);
10342 case CODE_FOR_fmul8x16_vis:
10343 for (i = 0; i < num; ++i)
10346 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10347 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10348 n_elts[i] = build_int_cst (inner_type, val);
10352 case CODE_FOR_fmul8x16au_vis:
10353 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10355 for (i = 0; i < num; ++i)
10358 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10360 n_elts[i] = build_int_cst (inner_type, val);
10364 case CODE_FOR_fmul8x16al_vis:
10365 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10367 for (i = 0; i < num; ++i)
10370 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10372 n_elts[i] = build_int_cst (inner_type, val);
10377 gcc_unreachable ();
10381 /* Handle TARGET_FOLD_BUILTIN target hook.
10382 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10383 result of the function call is ignored. NULL_TREE is returned if the
10384 function could not be folded. */
10387 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10388 tree *args, bool ignore)
10390 tree arg0, arg1, arg2;
10391 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10392 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10396 /* Note that a switch statement instead of the sequence of tests would
10397 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10398 and that would yield multiple alternatives with identical values. */
10399 if (icode == CODE_FOR_alignaddrsi_vis
10400 || icode == CODE_FOR_alignaddrdi_vis
10401 || icode == CODE_FOR_wrgsr_vis
10402 || icode == CODE_FOR_bmasksi_vis
10403 || icode == CODE_FOR_bmaskdi_vis
10404 || icode == CODE_FOR_cmask8si_vis
10405 || icode == CODE_FOR_cmask8di_vis
10406 || icode == CODE_FOR_cmask16si_vis
10407 || icode == CODE_FOR_cmask16di_vis
10408 || icode == CODE_FOR_cmask32si_vis
10409 || icode == CODE_FOR_cmask32di_vis)
10412 return build_zero_cst (rtype);
10417 case CODE_FOR_fexpand_vis:
10421 if (TREE_CODE (arg0) == VECTOR_CST)
10423 tree inner_type = TREE_TYPE (rtype);
10427 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10428 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10429 n_elts[i] = build_int_cst (inner_type,
10431 (VECTOR_CST_ELT (arg0, i)) << 4);
10432 return build_vector (rtype, n_elts);
10436 case CODE_FOR_fmul8x16_vis:
10437 case CODE_FOR_fmul8x16au_vis:
10438 case CODE_FOR_fmul8x16al_vis:
10444 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10446 tree inner_type = TREE_TYPE (rtype);
10447 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10448 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10449 return build_vector (rtype, n_elts);
10453 case CODE_FOR_fpmerge_vis:
10459 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10461 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10463 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10465 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10466 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10469 return build_vector (rtype, n_elts);
10473 case CODE_FOR_pdist_vis:
10481 if (TREE_CODE (arg0) == VECTOR_CST
10482 && TREE_CODE (arg1) == VECTOR_CST
10483 && TREE_CODE (arg2) == INTEGER_CST)
10485 bool overflow = false;
10486 double_int result = TREE_INT_CST (arg2);
10490 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10492 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10493 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10495 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10497 tmp = e1.neg_with_overflow (&neg1_ovf);
10498 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10499 if (tmp.is_negative ())
10500 tmp = tmp.neg_with_overflow (&neg2_ovf);
10502 result = result.add_with_sign (tmp, false, &add2_ovf);
10503 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10506 gcc_assert (!overflow);
10508 return build_int_cst_wide (rtype, result.low, result.high);
10518 /* ??? This duplicates information provided to the compiler by the
10519 ??? scheduler description. Some day, teach genautomata to output
10520 ??? the latencies and then CSE will just use that. */
10523 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10524 int *total, bool speed ATTRIBUTE_UNUSED)
10526 enum machine_mode mode = GET_MODE (x);
10527 bool float_mode_p = FLOAT_MODE_P (mode);
10532 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10550 if (GET_MODE (x) == VOIDmode
10551 && ((CONST_DOUBLE_HIGH (x) == 0
10552 && CONST_DOUBLE_LOW (x) < 0x1000)
10553 || (CONST_DOUBLE_HIGH (x) == -1
10554 && CONST_DOUBLE_LOW (x) < 0
10555 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10562 /* If outer-code was a sign or zero extension, a cost
10563 of COSTS_N_INSNS (1) was already added in. This is
10564 why we are subtracting it back out. */
10565 if (outer_code == ZERO_EXTEND)
10567 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10569 else if (outer_code == SIGN_EXTEND)
10571 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10573 else if (float_mode_p)
10575 *total = sparc_costs->float_load;
10579 *total = sparc_costs->int_load;
10587 *total = sparc_costs->float_plusminus;
10589 *total = COSTS_N_INSNS (1);
10596 gcc_assert (float_mode_p);
10597 *total = sparc_costs->float_mul;
10600 if (GET_CODE (sub) == NEG)
10601 sub = XEXP (sub, 0);
10602 *total += rtx_cost (sub, FMA, 0, speed);
10605 if (GET_CODE (sub) == NEG)
10606 sub = XEXP (sub, 0);
10607 *total += rtx_cost (sub, FMA, 2, speed);
10613 *total = sparc_costs->float_mul;
10614 else if (! TARGET_HARD_MUL)
10615 *total = COSTS_N_INSNS (25);
10621 if (sparc_costs->int_mul_bit_factor)
10625 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10627 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10628 for (nbits = 0; value != 0; value &= value - 1)
10631 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10632 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10634 rtx x1 = XEXP (x, 1);
10635 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10636 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10638 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10640 for (; value2 != 0; value2 &= value2 - 1)
10648 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10649 bit_cost = COSTS_N_INSNS (bit_cost);
10652 if (mode == DImode)
10653 *total = sparc_costs->int_mulX + bit_cost;
10655 *total = sparc_costs->int_mul + bit_cost;
10662 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10671 if (mode == DFmode)
10672 *total = sparc_costs->float_div_df;
10674 *total = sparc_costs->float_div_sf;
10678 if (mode == DImode)
10679 *total = sparc_costs->int_divX;
10681 *total = sparc_costs->int_div;
10686 if (! float_mode_p)
10688 *total = COSTS_N_INSNS (1);
10695 case UNSIGNED_FLOAT:
10699 case FLOAT_TRUNCATE:
10700 *total = sparc_costs->float_move;
10704 if (mode == DFmode)
10705 *total = sparc_costs->float_sqrt_df;
10707 *total = sparc_costs->float_sqrt_sf;
10712 *total = sparc_costs->float_cmp;
10714 *total = COSTS_N_INSNS (1);
10719 *total = sparc_costs->float_cmove;
10721 *total = sparc_costs->int_cmove;
10725 /* Handle the NAND vector patterns. */
10726 if (sparc_vector_mode_supported_p (GET_MODE (x))
10727 && GET_CODE (XEXP (x, 0)) == NOT
10728 && GET_CODE (XEXP (x, 1)) == NOT)
10730 *total = COSTS_N_INSNS (1);
10741 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10744 general_or_i64_p (reg_class_t rclass)
10746 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10749 /* Implement TARGET_REGISTER_MOVE_COST. */
10752 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10753 reg_class_t from, reg_class_t to)
10755 bool need_memory = false;
10757 if (from == FPCC_REGS || to == FPCC_REGS)
10758 need_memory = true;
10759 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10760 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10764 int size = GET_MODE_SIZE (mode);
10765 if (size == 8 || size == 4)
10767 if (! TARGET_ARCH32 || size == 4)
10773 need_memory = true;
10778 if (sparc_cpu == PROCESSOR_ULTRASPARC
10779 || sparc_cpu == PROCESSOR_ULTRASPARC3
10780 || sparc_cpu == PROCESSOR_NIAGARA
10781 || sparc_cpu == PROCESSOR_NIAGARA2
10782 || sparc_cpu == PROCESSOR_NIAGARA3
10783 || sparc_cpu == PROCESSOR_NIAGARA4)
10792 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10793 This is achieved by means of a manual dynamic stack space allocation in
10794 the current frame. We make the assumption that SEQ doesn't contain any
10795 function calls, with the possible exception of calls to the GOT helper. */
10798 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10800 /* We must preserve the lowest 16 words for the register save area. */
10801 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10802 /* We really need only 2 words of fresh stack space. */
10803 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10806 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
10807 SPARC_STACK_BIAS + offset));
10809 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
10810 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10812 emit_insn (gen_rtx_SET (VOIDmode,
10813 adjust_address (slot, word_mode, UNITS_PER_WORD),
10817 emit_insn (gen_rtx_SET (VOIDmode,
10819 adjust_address (slot, word_mode, UNITS_PER_WORD)));
10820 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
10821 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
10824 /* Output the assembler code for a thunk function. THUNK_DECL is the
10825 declaration for the thunk function itself, FUNCTION is the decl for
10826 the target function. DELTA is an immediate constant offset to be
10827 added to THIS. If VCALL_OFFSET is nonzero, the word at address
10828 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
10831 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10832 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10835 rtx this_rtx, insn, funexp;
10836 unsigned int int_arg_first;
10838 reload_completed = 1;
10839 epilogue_completed = 1;
10841 emit_note (NOTE_INSN_PROLOGUE_END);
10845 sparc_leaf_function_p = 1;
10847 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10849 else if (flag_delayed_branch)
10851 /* We will emit a regular sibcall below, so we need to instruct
10852 output_sibcall that we are in a leaf function. */
10853 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
10855 /* This will cause final.c to invoke leaf_renumber_regs so we
10856 must behave as if we were in a not-yet-leafified function. */
10857 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
10861 /* We will emit the sibcall manually below, so we will need to
10862 manually spill non-leaf registers. */
10863 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
10865 /* We really are in a leaf function. */
10866 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
10869 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
10870 returns a structure, the structure return pointer is there instead. */
10872 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10873 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
10875 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
10877 /* Add DELTA. When possible use a plain add, otherwise load it into
10878 a register first. */
10881 rtx delta_rtx = GEN_INT (delta);
10883 if (! SPARC_SIMM13_P (delta))
10885 rtx scratch = gen_rtx_REG (Pmode, 1);
10886 emit_move_insn (scratch, delta_rtx);
10887 delta_rtx = scratch;
10890 /* THIS_RTX += DELTA. */
10891 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
10894 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
10897 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10898 rtx scratch = gen_rtx_REG (Pmode, 1);
10900 gcc_assert (vcall_offset < 0);
10902 /* SCRATCH = *THIS_RTX. */
10903 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
10905 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
10906 may not have any available scratch register at this point. */
10907 if (SPARC_SIMM13_P (vcall_offset))
10909 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
10910 else if (! fixed_regs[5]
10911 /* The below sequence is made up of at least 2 insns,
10912 while the default method may need only one. */
10913 && vcall_offset < -8192)
10915 rtx scratch2 = gen_rtx_REG (Pmode, 5);
10916 emit_move_insn (scratch2, vcall_offset_rtx);
10917 vcall_offset_rtx = scratch2;
10921 rtx increment = GEN_INT (-4096);
10923 /* VCALL_OFFSET is a negative number whose typical range can be
10924 estimated as -32768..0 in 32-bit mode. In almost all cases
10925 it is therefore cheaper to emit multiple add insns than
10926 spilling and loading the constant into a register (at least
10928 while (! SPARC_SIMM13_P (vcall_offset))
10930 emit_insn (gen_add2_insn (scratch, increment));
10931 vcall_offset += 4096;
10933 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
10936 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
10937 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
10938 gen_rtx_PLUS (Pmode,
10940 vcall_offset_rtx)));
10942 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
10943 emit_insn (gen_add2_insn (this_rtx, scratch));
10946 /* Generate a tail call to the target function. */
10947 if (! TREE_USED (function))
10949 assemble_external (function);
10950 TREE_USED (function) = 1;
10952 funexp = XEXP (DECL_RTL (function), 0);
10954 if (flag_delayed_branch)
10956 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10957 insn = emit_call_insn (gen_sibcall (funexp));
10958 SIBLING_CALL_P (insn) = 1;
10962 /* The hoops we have to jump through in order to generate a sibcall
10963 without using delay slots... */
10964 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
10968 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
10970 load_got_register (); /* clobbers %o7 */
10971 scratch = sparc_legitimize_pic_address (funexp, scratch);
10972 seq = get_insns ();
10974 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
10976 else if (TARGET_ARCH32)
10978 emit_insn (gen_rtx_SET (VOIDmode,
10980 gen_rtx_HIGH (SImode, funexp)));
10981 emit_insn (gen_rtx_SET (VOIDmode,
10983 gen_rtx_LO_SUM (SImode, scratch, funexp)));
10985 else /* TARGET_ARCH64 */
10987 switch (sparc_cmodel)
10991 /* The destination can serve as a temporary. */
10992 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
10997 /* The destination cannot serve as a temporary. */
10998 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11000 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11001 seq = get_insns ();
11003 emit_and_preserve (seq, spill_reg, 0);
11007 gcc_unreachable ();
11011 emit_jump_insn (gen_indirect_jump (scratch));
11016 /* Run just enough of rest_of_compilation to get the insns emitted.
11017 There's not really enough bulk here to make other passes such as
11018 instruction scheduling worth while. Note that use_thunk calls
11019 assemble_start_function and assemble_end_function. */
11020 insn = get_insns ();
11021 shorten_branches (insn);
11022 final_start_function (insn, file, 1);
11023 final (insn, file, 1);
11024 final_end_function ();
11026 reload_completed = 0;
11027 epilogue_completed = 0;
11030 /* Return true if sparc_output_mi_thunk would be able to output the
11031 assembler code for the thunk function specified by the arguments
11032 it is passed, and false otherwise. */
11034 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11035 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11036 HOST_WIDE_INT vcall_offset,
11037 const_tree function ATTRIBUTE_UNUSED)
11039 /* Bound the loop used in the default method above. */
11040 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11043 /* How to allocate a 'struct machine_function'. */
11045 static struct machine_function *
11046 sparc_init_machine_status (void)
11048 return ggc_alloc_cleared_machine_function ();
11051 /* Locate some local-dynamic symbol still in use by this function
11052 so that we can print its name in local-dynamic base patterns. */
11054 static const char *
11055 get_some_local_dynamic_name (void)
11059 if (cfun->machine->some_ld_name)
11060 return cfun->machine->some_ld_name;
11062 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11064 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11065 return cfun->machine->some_ld_name;
11067 gcc_unreachable ();
11071 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11076 && GET_CODE (x) == SYMBOL_REF
11077 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11079 cfun->machine->some_ld_name = XSTR (x, 0);
11086 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11087 We need to emit DTP-relative relocations. */
11090 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11095 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11098 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11101 gcc_unreachable ();
11103 output_addr_const (file, x);
11107 /* Do whatever processing is required at the end of a file. */
11110 sparc_file_end (void)
11112 /* If we need to emit the special GOT helper function, do so now. */
11113 if (got_helper_rtx)
11115 const char *name = XSTR (got_helper_rtx, 0);
11116 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11117 #ifdef DWARF2_UNWIND_INFO
11121 if (USE_HIDDEN_LINKONCE)
11123 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11124 get_identifier (name),
11125 build_function_type_list (void_type_node,
11127 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11128 NULL_TREE, void_type_node);
11129 TREE_PUBLIC (decl) = 1;
11130 TREE_STATIC (decl) = 1;
11131 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11132 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11133 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11134 resolve_unique_section (decl, 0, flag_function_sections);
11135 allocate_struct_function (decl, true);
11136 cfun->is_thunk = 1;
11137 current_function_decl = decl;
11138 init_varasm_status ();
11139 assemble_start_function (decl, name);
11143 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11144 switch_to_section (text_section);
11146 ASM_OUTPUT_ALIGN (asm_out_file, align);
11147 ASM_OUTPUT_LABEL (asm_out_file, name);
11150 #ifdef DWARF2_UNWIND_INFO
11151 do_cfi = dwarf2out_do_cfi_asm ();
11153 fprintf (asm_out_file, "\t.cfi_startproc\n");
11155 if (flag_delayed_branch)
11156 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11157 reg_name, reg_name);
11159 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11160 reg_name, reg_name);
11161 #ifdef DWARF2_UNWIND_INFO
11163 fprintf (asm_out_file, "\t.cfi_endproc\n");
11167 if (NEED_INDICATE_EXEC_STACK)
11168 file_end_indicate_exec_stack ();
11170 #ifdef TARGET_SOLARIS
11171 solaris_file_end ();
11175 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11176 /* Implement TARGET_MANGLE_TYPE. */
11178 static const char *
11179 sparc_mangle_type (const_tree type)
11182 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11183 && TARGET_LONG_DOUBLE_128)
11186 /* For all other types, use normal C++ mangling. */
11191 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11192 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11193 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11196 sparc_emit_membar_for_model (enum memmodel model,
11197 int load_store, int before_after)
11199 /* Bits for the MEMBAR mmask field. */
11200 const int LoadLoad = 1;
11201 const int StoreLoad = 2;
11202 const int LoadStore = 4;
11203 const int StoreStore = 8;
11205 int mm = 0, implied = 0;
11207 switch (sparc_memory_model)
11210 /* Sequential Consistency. All memory transactions are immediately
11211 visible in sequential execution order. No barriers needed. */
11212 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11216 /* Total Store Ordering: all memory transactions with store semantics
11217 are followed by an implied StoreStore. */
11218 implied |= StoreStore;
11222 /* Partial Store Ordering: all memory transactions with load semantics
11223 are followed by an implied LoadLoad | LoadStore. */
11224 implied |= LoadLoad | LoadStore;
11226 /* If we're not looking for a raw barrer (before+after), then atomic
11227 operations get the benefit of being both load and store. */
11228 if (load_store == 3 && before_after == 2)
11229 implied |= StoreLoad | StoreStore;
11233 /* Relaxed Memory Ordering: no implicit bits. */
11237 gcc_unreachable ();
11240 if (before_after & 1)
11242 if (model == MEMMODEL_RELEASE
11243 || model == MEMMODEL_ACQ_REL
11244 || model == MEMMODEL_SEQ_CST)
11246 if (load_store & 1)
11247 mm |= LoadLoad | StoreLoad;
11248 if (load_store & 2)
11249 mm |= LoadStore | StoreStore;
11252 if (before_after & 2)
11254 if (model == MEMMODEL_ACQUIRE
11255 || model == MEMMODEL_ACQ_REL
11256 || model == MEMMODEL_SEQ_CST)
11258 if (load_store & 1)
11259 mm |= LoadLoad | LoadStore;
11260 if (load_store & 2)
11261 mm |= StoreLoad | StoreStore;
11265 /* Remove the bits implied by the system memory model. */
11268 /* For raw barriers (before+after), always emit a barrier.
11269 This will become a compile-time barrier if needed. */
11270 if (mm || before_after == 3)
11271 emit_insn (gen_membar (GEN_INT (mm)));
11274 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11275 compare and swap on the word containing the byte or half-word. */
11278 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11279 rtx oldval, rtx newval)
11281 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11282 rtx addr = gen_reg_rtx (Pmode);
11283 rtx off = gen_reg_rtx (SImode);
11284 rtx oldv = gen_reg_rtx (SImode);
11285 rtx newv = gen_reg_rtx (SImode);
11286 rtx oldvalue = gen_reg_rtx (SImode);
11287 rtx newvalue = gen_reg_rtx (SImode);
11288 rtx res = gen_reg_rtx (SImode);
11289 rtx resv = gen_reg_rtx (SImode);
11290 rtx memsi, val, mask, end_label, loop_label, cc;
11292 emit_insn (gen_rtx_SET (VOIDmode, addr,
11293 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11295 if (Pmode != SImode)
11296 addr1 = gen_lowpart (SImode, addr1);
11297 emit_insn (gen_rtx_SET (VOIDmode, off,
11298 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11300 memsi = gen_rtx_MEM (SImode, addr);
11301 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11302 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11304 val = copy_to_reg (memsi);
11306 emit_insn (gen_rtx_SET (VOIDmode, off,
11307 gen_rtx_XOR (SImode, off,
11308 GEN_INT (GET_MODE (mem) == QImode
11311 emit_insn (gen_rtx_SET (VOIDmode, off,
11312 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11314 if (GET_MODE (mem) == QImode)
11315 mask = force_reg (SImode, GEN_INT (0xff));
11317 mask = force_reg (SImode, GEN_INT (0xffff));
11319 emit_insn (gen_rtx_SET (VOIDmode, mask,
11320 gen_rtx_ASHIFT (SImode, mask, off)));
11322 emit_insn (gen_rtx_SET (VOIDmode, val,
11323 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11326 oldval = gen_lowpart (SImode, oldval);
11327 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11328 gen_rtx_ASHIFT (SImode, oldval, off)));
11330 newval = gen_lowpart_common (SImode, newval);
11331 emit_insn (gen_rtx_SET (VOIDmode, newv,
11332 gen_rtx_ASHIFT (SImode, newval, off)));
11334 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11335 gen_rtx_AND (SImode, oldv, mask)));
11337 emit_insn (gen_rtx_SET (VOIDmode, newv,
11338 gen_rtx_AND (SImode, newv, mask)));
11340 end_label = gen_label_rtx ();
11341 loop_label = gen_label_rtx ();
11342 emit_label (loop_label);
11344 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11345 gen_rtx_IOR (SImode, oldv, val)));
11347 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11348 gen_rtx_IOR (SImode, newv, val)));
11350 emit_move_insn (bool_result, const1_rtx);
11352 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11354 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11356 emit_insn (gen_rtx_SET (VOIDmode, resv,
11357 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11360 emit_move_insn (bool_result, const0_rtx);
11362 cc = gen_compare_reg_1 (NE, resv, val);
11363 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11365 /* Use cbranchcc4 to separate the compare and branch! */
11366 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11367 cc, const0_rtx, loop_label));
11369 emit_label (end_label);
11371 emit_insn (gen_rtx_SET (VOIDmode, res,
11372 gen_rtx_AND (SImode, res, mask)));
11374 emit_insn (gen_rtx_SET (VOIDmode, res,
11375 gen_rtx_LSHIFTRT (SImode, res, off)));
11377 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11380 /* Expand code to perform a compare-and-swap. */
11383 sparc_expand_compare_and_swap (rtx operands[])
11385 rtx bval, retval, mem, oldval, newval;
11386 enum machine_mode mode;
11387 enum memmodel model;
11389 bval = operands[0];
11390 retval = operands[1];
11392 oldval = operands[3];
11393 newval = operands[4];
11394 model = (enum memmodel) INTVAL (operands[6]);
11395 mode = GET_MODE (mem);
11397 sparc_emit_membar_for_model (model, 3, 1);
11399 if (reg_overlap_mentioned_p (retval, oldval))
11400 oldval = copy_to_reg (oldval);
11402 if (mode == QImode || mode == HImode)
11403 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11406 rtx (*gen) (rtx, rtx, rtx, rtx);
11409 if (mode == SImode)
11410 gen = gen_atomic_compare_and_swapsi_1;
11412 gen = gen_atomic_compare_and_swapdi_1;
11413 emit_insn (gen (retval, mem, oldval, newval));
11415 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11417 convert_move (bval, x, 1);
11420 sparc_emit_membar_for_model (model, 3, 2);
11424 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11428 sel = gen_lowpart (DImode, sel);
11432 /* inp = xxxxxxxAxxxxxxxB */
11433 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11434 NULL_RTX, 1, OPTAB_DIRECT);
11435 /* t_1 = ....xxxxxxxAxxx. */
11436 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11437 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11438 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11439 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11440 /* sel = .......B */
11441 /* t_1 = ...A.... */
11442 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11443 /* sel = ...A...B */
11444 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11445 /* sel = AAAABBBB * 4 */
11446 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11447 /* sel = { A*4, A*4+1, A*4+2, ... } */
11451 /* inp = xxxAxxxBxxxCxxxD */
11452 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11453 NULL_RTX, 1, OPTAB_DIRECT);
11454 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11455 NULL_RTX, 1, OPTAB_DIRECT);
11456 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11457 NULL_RTX, 1, OPTAB_DIRECT);
11458 /* t_1 = ..xxxAxxxBxxxCxx */
11459 /* t_2 = ....xxxAxxxBxxxC */
11460 /* t_3 = ......xxxAxxxBxx */
11461 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11463 NULL_RTX, 1, OPTAB_DIRECT);
11464 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11466 NULL_RTX, 1, OPTAB_DIRECT);
11467 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11468 GEN_INT (0x070000),
11469 NULL_RTX, 1, OPTAB_DIRECT);
11470 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11471 GEN_INT (0x07000000),
11472 NULL_RTX, 1, OPTAB_DIRECT);
11473 /* sel = .......D */
11474 /* t_1 = .....C.. */
11475 /* t_2 = ...B.... */
11476 /* t_3 = .A...... */
11477 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11478 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11479 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11480 /* sel = .A.B.C.D */
11481 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11482 /* sel = AABBCCDD * 2 */
11483 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11484 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11488 /* input = xAxBxCxDxExFxGxH */
11489 sel = expand_simple_binop (DImode, AND, sel,
11490 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11492 NULL_RTX, 1, OPTAB_DIRECT);
11493 /* sel = .A.B.C.D.E.F.G.H */
11494 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11495 NULL_RTX, 1, OPTAB_DIRECT);
11496 /* t_1 = ..A.B.C.D.E.F.G. */
11497 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11498 NULL_RTX, 1, OPTAB_DIRECT);
11499 /* sel = .AABBCCDDEEFFGGH */
11500 sel = expand_simple_binop (DImode, AND, sel,
11501 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11503 NULL_RTX, 1, OPTAB_DIRECT);
11504 /* sel = ..AB..CD..EF..GH */
11505 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11506 NULL_RTX, 1, OPTAB_DIRECT);
11507 /* t_1 = ....AB..CD..EF.. */
11508 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11509 NULL_RTX, 1, OPTAB_DIRECT);
11510 /* sel = ..ABABCDCDEFEFGH */
11511 sel = expand_simple_binop (DImode, AND, sel,
11512 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11513 NULL_RTX, 1, OPTAB_DIRECT);
11514 /* sel = ....ABCD....EFGH */
11515 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11516 NULL_RTX, 1, OPTAB_DIRECT);
11517 /* t_1 = ........ABCD.... */
11518 sel = gen_lowpart (SImode, sel);
11519 t_1 = gen_lowpart (SImode, t_1);
11523 gcc_unreachable ();
11526 /* Always perform the final addition/merge within the bmask insn. */
11527 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11530 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11533 sparc_frame_pointer_required (void)
11535 /* If the stack pointer is dynamically modified in the function, it cannot
11536 serve as the frame pointer. */
11537 if (cfun->calls_alloca)
11540 /* If the function receives nonlocal gotos, it needs to save the frame
11541 pointer in the nonlocal_goto_save_area object. */
11542 if (cfun->has_nonlocal_label)
11545 /* In flat mode, that's it. */
11549 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11550 return !(crtl->is_leaf && only_leaf_regs_used ());
11553 /* The way this is structured, we can't eliminate SFP in favor of SP
11554 if the frame pointer is required: we want to use the SFP->HFP elimination
11555 in that case. But the test in update_eliminables doesn't know we are
11556 assuming below that we only do the former elimination. */
11559 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11561 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11564 /* Return the hard frame pointer directly to bypass the stack bias. */
11567 sparc_builtin_setjmp_frame_value (void)
11569 return hard_frame_pointer_rtx;
11572 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11573 they won't be allocated. */
11576 sparc_conditional_register_usage (void)
11578 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11580 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11581 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11583 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11584 /* then honor it. */
11585 if (TARGET_ARCH32 && fixed_regs[5])
11587 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11592 for (regno = SPARC_FIRST_V9_FP_REG;
11593 regno <= SPARC_LAST_V9_FP_REG;
11595 fixed_regs[regno] = 1;
11596 /* %fcc0 is used by v8 and v9. */
11597 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11598 regno <= SPARC_LAST_V9_FCC_REG;
11600 fixed_regs[regno] = 1;
11605 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11606 fixed_regs[regno] = 1;
11608 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11609 /* then honor it. Likewise with g3 and g4. */
11610 if (fixed_regs[2] == 2)
11611 fixed_regs[2] = ! TARGET_APP_REGS;
11612 if (fixed_regs[3] == 2)
11613 fixed_regs[3] = ! TARGET_APP_REGS;
11614 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11615 fixed_regs[4] = ! TARGET_APP_REGS;
11616 else if (TARGET_CM_EMBMEDANY)
11618 else if (fixed_regs[4] == 2)
11623 /* Disable leaf functions. */
11624 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11625 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11626 leaf_reg_remap [regno] = regno;
11629 global_regs[SPARC_GSR_REG] = 1;
11632 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11634 - We can't load constants into FP registers.
11635 - We can't load FP constants into integer registers when soft-float,
11636 because there is no soft-float pattern with a r/F constraint.
11637 - We can't load FP constants into integer registers for TFmode unless
11638 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11639 - Try and reload integer constants (symbolic or otherwise) back into
11640 registers directly, rather than having them dumped to memory. */
11643 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11645 enum machine_mode mode = GET_MODE (x);
11646 if (CONSTANT_P (x))
11648 if (FP_REG_CLASS_P (rclass)
11649 || rclass == GENERAL_OR_FP_REGS
11650 || rclass == GENERAL_OR_EXTRA_FP_REGS
11651 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11652 || (mode == TFmode && ! const_zero_operand (x, mode)))
11655 if (GET_MODE_CLASS (mode) == MODE_INT)
11656 return GENERAL_REGS;
11658 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11660 if (! FP_REG_CLASS_P (rclass)
11661 || !(const_zero_operand (x, mode)
11662 || const_all_ones_operand (x, mode)))
11669 && (rclass == EXTRA_FP_REGS
11670 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11672 int regno = true_regnum (x);
11674 if (SPARC_INT_REG_P (regno))
11675 return (rclass == EXTRA_FP_REGS
11676 ? FP_REGS : GENERAL_OR_FP_REGS);
11682 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11683 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11686 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11690 gcc_assert (! TARGET_ARCH64);
11692 if (sparc_check_64 (operands[1], insn) <= 0)
11693 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11694 if (which_alternative == 1)
11695 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11696 if (GET_CODE (operands[2]) == CONST_INT)
11698 if (which_alternative == 1)
11700 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11701 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11702 output_asm_insn (mulstr, operands);
11703 return "srlx\t%L0, 32, %H0";
11707 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11708 output_asm_insn ("or\t%L1, %3, %3", operands);
11709 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11710 output_asm_insn (mulstr, operands);
11711 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11712 return "mov\t%3, %L0";
11715 else if (rtx_equal_p (operands[1], operands[2]))
11717 if (which_alternative == 1)
11719 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11720 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11721 output_asm_insn (mulstr, operands);
11722 return "srlx\t%L0, 32, %H0";
11726 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11727 output_asm_insn ("or\t%L1, %3, %3", operands);
11728 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11729 output_asm_insn (mulstr, operands);
11730 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11731 return "mov\t%3, %L0";
11734 if (sparc_check_64 (operands[2], insn) <= 0)
11735 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11736 if (which_alternative == 1)
11738 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11739 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11740 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11741 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11742 output_asm_insn (mulstr, operands);
11743 return "srlx\t%L0, 32, %H0";
11747 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11748 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11749 output_asm_insn ("or\t%L1, %3, %3", operands);
11750 output_asm_insn ("or\t%L2, %4, %4", operands);
11751 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11752 output_asm_insn (mulstr, operands);
11753 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11754 return "mov\t%3, %L0";
11758 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11759 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11760 and INNER_MODE are the modes describing TARGET. */
11763 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11764 enum machine_mode inner_mode)
11766 rtx t1, final_insn;
11769 t1 = gen_reg_rtx (mode);
11771 elt = convert_modes (SImode, inner_mode, elt, true);
11772 emit_move_insn (gen_lowpart(SImode, t1), elt);
11777 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11778 bmask = 0x45674567;
11781 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11782 bmask = 0x67676767;
11785 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11786 bmask = 0x77777777;
11789 gcc_unreachable ();
11792 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
11793 force_reg (SImode, GEN_INT (bmask))));
11794 emit_insn (final_insn);
11797 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11798 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11801 vector_init_fpmerge (rtx target, rtx elt)
11803 rtx t1, t2, t2_low, t3, t3_low;
11805 t1 = gen_reg_rtx (V4QImode);
11806 elt = convert_modes (SImode, QImode, elt, true);
11807 emit_move_insn (gen_lowpart (SImode, t1), elt);
11809 t2 = gen_reg_rtx (V8QImode);
11810 t2_low = gen_lowpart (V4QImode, t2);
11811 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11813 t3 = gen_reg_rtx (V8QImode);
11814 t3_low = gen_lowpart (V4QImode, t3);
11815 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
11817 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
11820 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11821 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
11824 vector_init_faligndata (rtx target, rtx elt)
11826 rtx t1 = gen_reg_rtx (V4HImode);
11829 elt = convert_modes (SImode, HImode, elt, true);
11830 emit_move_insn (gen_lowpart (SImode, t1), elt);
11832 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
11833 force_reg (SImode, GEN_INT (6)),
11836 for (i = 0; i < 4; i++)
11837 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
11840 /* Emit code to initialize TARGET to values for individual fields VALS. */
11843 sparc_expand_vector_init (rtx target, rtx vals)
11845 const enum machine_mode mode = GET_MODE (target);
11846 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
11847 const int n_elts = GET_MODE_NUNITS (mode);
11853 for (i = 0; i < n_elts; i++)
11855 rtx x = XVECEXP (vals, 0, i);
11856 if (!CONSTANT_P (x))
11859 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
11865 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
11869 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
11871 if (GET_MODE_SIZE (inner_mode) == 4)
11873 emit_move_insn (gen_lowpart (SImode, target),
11874 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
11877 else if (GET_MODE_SIZE (inner_mode) == 8)
11879 emit_move_insn (gen_lowpart (DImode, target),
11880 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
11884 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
11885 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
11887 emit_move_insn (gen_highpart (word_mode, target),
11888 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
11889 emit_move_insn (gen_lowpart (word_mode, target),
11890 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
11894 if (all_same && GET_MODE_SIZE (mode) == 8)
11898 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
11901 if (mode == V8QImode)
11903 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
11906 if (mode == V4HImode)
11908 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
11913 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
11914 for (i = 0; i < n_elts; i++)
11915 emit_move_insn (adjust_address_nv (mem, inner_mode,
11916 i * GET_MODE_SIZE (inner_mode)),
11917 XVECEXP (vals, 0, i));
11918 emit_move_insn (target, mem);
11921 /* Implement TARGET_SECONDARY_RELOAD. */
11924 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11925 enum machine_mode mode, secondary_reload_info *sri)
11927 enum reg_class rclass = (enum reg_class) rclass_i;
11929 sri->icode = CODE_FOR_nothing;
11930 sri->extra_cost = 0;
11932 /* We need a temporary when loading/storing a HImode/QImode value
11933 between memory and the FPU registers. This can happen when combine puts
11934 a paradoxical subreg in a float/fix conversion insn. */
11935 if (FP_REG_CLASS_P (rclass)
11936 && (mode == HImode || mode == QImode)
11937 && (GET_CODE (x) == MEM
11938 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
11939 && true_regnum (x) == -1)))
11940 return GENERAL_REGS;
11942 /* On 32-bit we need a temporary when loading/storing a DFmode value
11943 between unaligned memory and the upper FPU registers. */
11945 && rclass == EXTRA_FP_REGS
11947 && GET_CODE (x) == MEM
11948 && ! mem_min_alignment (x, 8))
11951 if (((TARGET_CM_MEDANY
11952 && symbolic_operand (x, mode))
11953 || (TARGET_CM_EMBMEDANY
11954 && text_segment_operand (x, mode)))
11958 sri->icode = direct_optab_handler (reload_in_optab, mode);
11960 sri->icode = direct_optab_handler (reload_out_optab, mode);
11964 if (TARGET_VIS3 && TARGET_ARCH32)
11966 int regno = true_regnum (x);
11968 /* When using VIS3 fp<-->int register moves, on 32-bit we have
11969 to move 8-byte values in 4-byte pieces. This only works via
11970 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
11971 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
11972 an FP_REGS intermediate move. */
11973 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
11974 || ((general_or_i64_p (rclass)
11975 || rclass == GENERAL_OR_FP_REGS)
11976 && SPARC_FP_REG_P (regno)))
11978 sri->extra_cost = 2;
11986 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
11987 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
11990 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
11992 enum rtx_code rc = GET_CODE (operands[1]);
11993 enum machine_mode cmp_mode;
11994 rtx cc_reg, dst, cmp;
11997 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12000 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12001 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12003 cmp_mode = GET_MODE (XEXP (cmp, 0));
12004 rc = GET_CODE (cmp);
12007 if (! rtx_equal_p (operands[2], dst)
12008 && ! rtx_equal_p (operands[3], dst))
12010 if (reg_overlap_mentioned_p (dst, cmp))
12011 dst = gen_reg_rtx (mode);
12013 emit_move_insn (dst, operands[3]);
12015 else if (operands[2] == dst)
12017 operands[2] = operands[3];
12019 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12020 rc = reverse_condition_maybe_unordered (rc);
12022 rc = reverse_condition (rc);
12025 if (XEXP (cmp, 1) == const0_rtx
12026 && GET_CODE (XEXP (cmp, 0)) == REG
12027 && cmp_mode == DImode
12028 && v9_regcmp_p (rc))
12029 cc_reg = XEXP (cmp, 0);
12031 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12033 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12035 emit_insn (gen_rtx_SET (VOIDmode, dst,
12036 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12038 if (dst != operands[0])
12039 emit_move_insn (operands[0], dst);
12044 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12045 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12046 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12047 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12048 code to be used for the condition mask. */
12051 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12053 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12054 enum rtx_code code = GET_CODE (operands[3]);
12056 mask = gen_reg_rtx (Pmode);
12057 cop0 = operands[4];
12058 cop1 = operands[5];
12059 if (code == LT || code == GE)
12063 code = swap_condition (code);
12064 t = cop0; cop0 = cop1; cop1 = t;
12067 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12069 fcmp = gen_rtx_UNSPEC (Pmode,
12070 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12073 cmask = gen_rtx_UNSPEC (DImode,
12074 gen_rtvec (2, mask, gsr),
12077 bshuf = gen_rtx_UNSPEC (mode,
12078 gen_rtvec (3, operands[1], operands[2], gsr),
12081 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12082 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12084 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12087 /* On sparc, any mode which naturally allocates into the float
12088 registers should return 4 here. */
12091 sparc_regmode_natural_size (enum machine_mode mode)
12093 int size = UNITS_PER_WORD;
12097 enum mode_class mclass = GET_MODE_CLASS (mode);
12099 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12106 /* Return TRUE if it is a good idea to tie two pseudo registers
12107 when one has mode MODE1 and one has mode MODE2.
12108 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12109 for any hard reg, then this must be FALSE for correct output.
12111 For V9 we have to deal with the fact that only the lower 32 floating
12112 point registers are 32-bit addressable. */
12115 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12117 enum mode_class mclass1, mclass2;
12118 unsigned short size1, size2;
12120 if (mode1 == mode2)
12123 mclass1 = GET_MODE_CLASS (mode1);
12124 mclass2 = GET_MODE_CLASS (mode2);
12125 if (mclass1 != mclass2)
12131 /* Classes are the same and we are V9 so we have to deal with upper
12132 vs. lower floating point registers. If one of the modes is a
12133 4-byte mode, and the other is not, we have to mark them as not
12134 tieable because only the lower 32 floating point register are
12135 addressable 32-bits at a time.
12137 We can't just test explicitly for SFmode, otherwise we won't
12138 cover the vector mode cases properly. */
12140 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12143 size1 = GET_MODE_SIZE (mode1);
12144 size2 = GET_MODE_SIZE (mode2);
12145 if ((size1 > 4 && size2 == 4)
12146 || (size2 > 4 && size1 == 4))
12152 static enum machine_mode sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12154 return (TARGET_ARCH64 ? DImode : SImode);
12157 #include "gt-sparc.h"