1 /* Copyright (C) 2006 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 #include "coretypes.h"
24 #include "hard-reg-set.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
38 #include "basic-block.h"
39 #include "integrate.h"
45 #include "target-def.h"
46 #include "langhooks.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
58 /* Target specific attribute specifications. */
59 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
61 /* Prototypes and external defs. */
62 static void spu_init_builtins (void);
63 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
64 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
65 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
66 static rtx get_pic_reg (void);
67 static int need_to_save_reg (int regno, int saving);
68 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
69 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
70 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
72 static void emit_nop_for_insn (rtx insn);
73 static bool insn_clobbers_hbr (rtx insn);
74 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
76 static rtx get_branch_target (rtx branch);
77 static void insert_branch_hints (void);
78 static void insert_nops (void);
79 static void spu_machine_dependent_reorg (void);
80 static int spu_sched_issue_rate (void);
81 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
83 static int get_pipe (rtx insn);
84 static int spu_sched_adjust_priority (rtx insn, int pri);
85 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
86 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
88 unsigned char *no_add_attrs);
89 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
91 unsigned char *no_add_attrs);
92 static int spu_naked_function_p (tree func);
93 static unsigned char spu_pass_by_reference (int *cum, enum machine_mode mode,
94 tree type, unsigned char named);
95 static tree spu_build_builtin_va_list (void);
96 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
98 static int regno_aligned_for_load (int regno);
99 static int store_with_one_insn_p (rtx mem);
100 static int reg_align (rtx reg);
101 static int mem_is_padded_component_ref (rtx x);
102 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
103 static void spu_asm_globalize_label (FILE * file, const char *name);
104 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
106 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
107 static void spu_init_libfuncs (void);
108 static bool spu_return_in_memory (tree type, tree fntype);
110 extern const char *reg_names[];
111 rtx spu_compare_op0, spu_compare_op1;
125 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
126 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
128 /* Built in types. */
129 tree spu_builtin_types[SPU_BTI_MAX];
131 /* TARGET overrides. */
133 #undef TARGET_INIT_BUILTINS
134 #define TARGET_INIT_BUILTINS spu_init_builtins
136 #undef TARGET_RESOLVE_OVERLOADED_BUILTIN
137 #define TARGET_RESOLVE_OVERLOADED_BUILTIN spu_resolve_overloaded_builtin
139 #undef TARGET_EXPAND_BUILTIN
140 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
142 #undef TARGET_EH_RETURN_FILTER_MODE
143 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
145 /* The .8byte directive doesn't seem to work well for a 32 bit
147 #undef TARGET_ASM_UNALIGNED_DI_OP
148 #define TARGET_ASM_UNALIGNED_DI_OP NULL
150 #undef TARGET_RTX_COSTS
151 #define TARGET_RTX_COSTS spu_rtx_costs
153 #undef TARGET_ADDRESS_COST
154 #define TARGET_ADDRESS_COST hook_int_rtx_0
156 #undef TARGET_SCHED_ISSUE_RATE
157 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
159 #undef TARGET_SCHED_VARIABLE_ISSUE
160 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
162 #undef TARGET_SCHED_ADJUST_PRIORITY
163 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
165 #undef TARGET_SCHED_ADJUST_COST
166 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
168 const struct attribute_spec spu_attribute_table[];
169 #undef TARGET_ATTRIBUTE_TABLE
170 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
172 #undef TARGET_ASM_INTEGER
173 #define TARGET_ASM_INTEGER spu_assemble_integer
175 #undef TARGET_SCALAR_MODE_SUPPORTED_P
176 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
178 #undef TARGET_VECTOR_MODE_SUPPORTED_P
179 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
181 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
182 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
184 #undef TARGET_ASM_GLOBALIZE_LABEL
185 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
187 #undef TARGET_PASS_BY_REFERENCE
188 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
190 #undef TARGET_MUST_PASS_IN_STACK
191 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
193 #undef TARGET_BUILD_BUILTIN_VA_LIST
194 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
196 #undef TARGET_SETUP_INCOMING_VARARGS
197 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
199 #undef TARGET_MACHINE_DEPENDENT_REORG
200 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
202 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
203 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
205 #undef TARGET_DEFAULT_TARGET_FLAGS
206 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
208 #undef TARGET_INIT_LIBFUNCS
209 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
211 #undef TARGET_RETURN_IN_MEMORY
212 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
214 struct gcc_target targetm = TARGET_INITIALIZER;
217 spu_cpu_cpp_builtins (struct cpp_reader *pfile)
219 extern void builtin_define_std (const char *);
220 builtin_define_std ("__SPU__");
221 cpp_assert (pfile, "cpu=spu");
222 cpp_assert (pfile, "machine=spu");
223 builtin_define_std ("__vector=__attribute__((__spu_vector__))");
226 /* Sometimes certain combinations of command options do not make sense
227 on a particular target machine. You can define a macro
228 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
229 executed once just after all the command options have been parsed. */
231 spu_override_options (void)
234 /* Don't give warnings about the main() function. */
237 /* Override some of the default param values. With so many registers
238 larger values are better for these params. */
239 if (MAX_UNROLLED_INSNS == 100)
240 MAX_UNROLLED_INSNS = 250;
241 if (MAX_PENDING_LIST_LENGTH == 32)
242 MAX_PENDING_LIST_LENGTH = 128;
244 flag_omit_frame_pointer = 1;
246 if (align_functions < 8)
250 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
251 struct attribute_spec.handler. */
253 /* Table of machine attributes. */
254 const struct attribute_spec spu_attribute_table[] =
256 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
257 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
258 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
259 { NULL, 0, 0, false, false, false, NULL }
262 /* True if MODE is valid for the target. By "valid", we mean able to
263 be manipulated in non-trivial ways. In particular, this means all
264 the arithmetic is supported. */
266 spu_scalar_mode_supported_p (enum machine_mode mode)
284 /* Similarly for vector modes. "Supported" here is less strict. At
285 least some operations are supported; need to check optabs or builtins
286 for further details. */
288 spu_vector_mode_supported_p (enum machine_mode mode)
305 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
306 least significant bytes of the outer mode. This function returns
307 TRUE for the SUBREG's where this is correct. */
309 valid_subreg (rtx op)
311 enum machine_mode om = GET_MODE (op);
312 enum machine_mode im = GET_MODE (SUBREG_REG (op));
313 return om != VOIDmode && im != VOIDmode
314 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
315 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
318 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
319 and ajust the start offset. */
321 adjust_operand (rtx op, HOST_WIDE_INT * start)
323 enum machine_mode mode;
325 /* Strip any SUBREG */
326 if (GET_CODE (op) == SUBREG)
330 GET_MODE_BITSIZE (GET_MODE (op)) -
331 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
332 op = SUBREG_REG (op);
334 /* If it is smaller than SI, assure a SUBREG */
335 op_size = GET_MODE_BITSIZE (GET_MODE (op));
339 *start += 32 - op_size;
342 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
343 mode = mode_for_size (op_size, MODE_INT, 0);
344 if (mode != GET_MODE (op))
345 op = gen_rtx_SUBREG (mode, op, 0);
350 spu_expand_extv (rtx ops[], int unsignedp)
352 HOST_WIDE_INT width = INTVAL (ops[2]);
353 HOST_WIDE_INT start = INTVAL (ops[3]);
354 HOST_WIDE_INT src_size, dst_size;
355 enum machine_mode src_mode, dst_mode;
356 rtx dst = ops[0], src = ops[1];
359 dst = adjust_operand (ops[0], 0);
360 dst_mode = GET_MODE (dst);
361 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
363 if (GET_CODE (ops[1]) == MEM)
365 if (start + width > MEM_ALIGN (ops[1]))
367 rtx addr = gen_reg_rtx (SImode);
368 rtx shl = gen_reg_rtx (SImode);
369 rtx shr = gen_reg_rtx (SImode);
370 rtx w0 = gen_reg_rtx (TImode);
371 rtx w1 = gen_reg_rtx (TImode);
373 src = gen_reg_rtx (TImode);
374 emit_move_insn (addr, copy_rtx (XEXP (ops[1], 0)));
375 a0 = memory_address (TImode, addr);
376 a1 = memory_address (TImode, plus_constant (addr, 16));
377 emit_insn (gen_lq (w0, a0));
378 emit_insn (gen_lq (w1, a1));
379 emit_insn (gen_andsi3 (shl, addr, GEN_INT (15)));
380 emit_insn (gen_iorsi3 (shr, addr, GEN_INT (16)));
381 emit_insn (gen_shlqby_ti (w0, w0, shl));
382 emit_insn (gen_rotqmby_ti (w1, w1, shr));
383 emit_insn (gen_iorti3 (src, w0, w1));
387 rtx addr = gen_reg_rtx (SImode);
389 emit_move_insn (addr, copy_rtx (XEXP (ops[1], 0)));
390 a0 = memory_address (TImode, addr);
391 src = gen_reg_rtx (TImode);
392 emit_insn (gen_lq (src, a0));
393 if (MEM_ALIGN (ops[1]) < 128)
396 src = gen_reg_rtx (TImode);
397 emit_insn (gen_rotqby_ti (src, t, addr));
400 /* Shifts in SImode are faster, use them if we can. */
401 if (start + width < 32)
404 src = gen_reg_rtx (SImode);
405 emit_insn (gen_spu_convert (src, t));
409 src = adjust_operand (src, &start);
410 src_mode = GET_MODE (src);
411 src_size = GET_MODE_BITSIZE (GET_MODE (src));
415 s = gen_reg_rtx (src_mode);
419 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
422 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
425 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
433 if (width < src_size)
440 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
443 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
446 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
451 s = gen_reg_rtx (src_mode);
452 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
457 convert_move (dst, src, unsignedp);
461 spu_expand_insv (rtx ops[])
463 HOST_WIDE_INT width = INTVAL (ops[1]);
464 HOST_WIDE_INT start = INTVAL (ops[2]);
465 HOST_WIDE_INT maskbits;
466 enum machine_mode dst_mode, src_mode;
467 rtx dst = ops[0], src = ops[3];
468 int dst_size, src_size;
474 if (GET_CODE (ops[0]) == MEM)
475 dst = gen_reg_rtx (TImode);
477 dst = adjust_operand (dst, &start);
478 dst_mode = GET_MODE (dst);
479 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
481 if (CONSTANT_P (src))
483 enum machine_mode m =
484 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
485 src = force_reg (m, convert_to_mode (m, src, 0));
487 src = adjust_operand (src, 0);
488 src_mode = GET_MODE (src);
489 src_size = GET_MODE_BITSIZE (GET_MODE (src));
491 mask = gen_reg_rtx (dst_mode);
492 shift_reg = gen_reg_rtx (dst_mode);
493 shift = dst_size - start - width;
495 /* It's not safe to use subreg here because the compiler assumes
496 that the SUBREG_REG is right justified in the SUBREG. */
497 convert_move (shift_reg, src, 1);
504 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
507 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
510 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
522 maskbits = (-1ll << (32 - width - start));
524 maskbits += (1ll << (32 - start));
525 emit_move_insn (mask, GEN_INT (maskbits));
528 maskbits = (-1ll << (64 - width - start));
530 maskbits += (1ll << (64 - start));
531 emit_move_insn (mask, GEN_INT (maskbits));
535 unsigned char arr[16];
537 memset (arr, 0, sizeof (arr));
538 arr[i] = 0xff >> (start & 7);
539 for (i++; i <= (start + width - 1) / 8; i++)
541 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
542 emit_move_insn (mask, array_to_constant (TImode, arr));
548 if (GET_CODE (ops[0]) == MEM)
550 rtx aligned = gen_reg_rtx (SImode);
551 rtx low = gen_reg_rtx (SImode);
552 rtx addr = gen_reg_rtx (SImode);
553 rtx rotl = gen_reg_rtx (SImode);
554 rtx mask0 = gen_reg_rtx (TImode);
557 emit_move_insn (addr, XEXP (ops[0], 0));
558 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
559 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
560 emit_insn (gen_negsi2 (rotl, low));
561 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
562 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
563 mem = change_address (ops[0], TImode, aligned);
564 set_mem_alias_set (mem, 0);
565 emit_move_insn (dst, mem);
566 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
567 emit_move_insn (mem, dst);
568 if (start + width > MEM_ALIGN (ops[0]))
570 rtx shl = gen_reg_rtx (SImode);
571 rtx mask1 = gen_reg_rtx (TImode);
572 rtx dst1 = gen_reg_rtx (TImode);
574 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
575 emit_insn (gen_shlqby_ti (mask1, mask, shl));
576 mem1 = adjust_address (mem, TImode, 16);
577 set_mem_alias_set (mem1, 0);
578 emit_move_insn (dst1, mem1);
579 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
580 emit_move_insn (mem1, dst1);
584 emit_insn (gen_selb (dst, dst, shift_reg, mask));
589 spu_expand_block_move (rtx ops[])
591 HOST_WIDE_INT bytes, align, offset;
592 rtx src, dst, sreg, dreg, target;
594 if (GET_CODE (ops[2]) != CONST_INT
595 || GET_CODE (ops[3]) != CONST_INT
596 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
599 bytes = INTVAL (ops[2]);
600 align = INTVAL (ops[3]);
610 for (offset = 0; offset + 16 <= bytes; offset += 16)
612 dst = adjust_address (ops[0], V16QImode, offset);
613 src = adjust_address (ops[1], V16QImode, offset);
614 emit_move_insn (dst, src);
619 unsigned char arr[16] = { 0 };
620 for (i = 0; i < bytes - offset; i++)
622 dst = adjust_address (ops[0], V16QImode, offset);
623 src = adjust_address (ops[1], V16QImode, offset);
624 mask = gen_reg_rtx (V16QImode);
625 sreg = gen_reg_rtx (V16QImode);
626 dreg = gen_reg_rtx (V16QImode);
627 target = gen_reg_rtx (V16QImode);
628 emit_move_insn (mask, array_to_constant (V16QImode, arr));
629 emit_move_insn (dreg, dst);
630 emit_move_insn (sreg, src);
631 emit_insn (gen_selb (target, dreg, sreg, mask));
632 emit_move_insn (dst, target);
640 { SPU_EQ, SPU_GT, SPU_GTU };
643 int spu_comp_icode[8][3] = {
644 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
645 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
646 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
647 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
648 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
649 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
651 {CODE_FOR_ceq_vec, 0, 0},
654 /* Generate a compare for CODE. Return a brand-new rtx that represents
655 the result of the compare. GCC can figure this out too if we don't
656 provide all variations of compares, but GCC always wants to use
657 WORD_MODE, we can generate better code in most cases if we do it
660 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
662 int reverse_compare = 0;
663 int reverse_test = 0;
666 rtx target = operands[0];
667 enum machine_mode comp_mode;
668 enum machine_mode op_mode;
669 enum spu_comp_code scode;
672 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
673 and so on, to keep the constant in operand 1. */
674 if (GET_CODE (spu_compare_op1) == CONST_INT)
676 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
677 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
681 spu_compare_op1 = GEN_INT (val);
685 spu_compare_op1 = GEN_INT (val);
689 spu_compare_op1 = GEN_INT (val);
693 spu_compare_op1 = GEN_INT (val);
754 op_mode = GET_MODE (spu_compare_op0);
793 if (GET_MODE (spu_compare_op1) == DFmode)
795 rtx reg = gen_reg_rtx (DFmode);
796 if (!flag_unsafe_math_optimizations
797 || (scode != SPU_GT && scode != SPU_EQ))
800 emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
802 emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
804 spu_compare_op0 = reg;
805 spu_compare_op1 = CONST0_RTX (DFmode);
808 if (is_set == 0 && spu_compare_op1 == const0_rtx
809 && (GET_MODE (spu_compare_op0) == SImode
810 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
812 /* Don't need to set a register with the result when we are
813 comparing against zero and branching. */
814 reverse_test = !reverse_test;
815 compare_result = spu_compare_op0;
819 compare_result = gen_reg_rtx (comp_mode);
823 rtx t = spu_compare_op1;
824 spu_compare_op1 = spu_compare_op0;
828 if (spu_comp_icode[index][scode] == 0)
831 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
832 (spu_compare_op0, op_mode))
833 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
834 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
835 (spu_compare_op1, op_mode))
836 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
837 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
842 emit_insn (comp_rtx);
851 /* We don't have branch on QI compare insns, so we convert the
852 QI compare result to a HI result. */
853 if (comp_mode == QImode)
855 rtx old_res = compare_result;
856 compare_result = gen_reg_rtx (HImode);
858 emit_insn (gen_extendqihi2 (compare_result, old_res));
862 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
864 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
866 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
867 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
868 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
871 else if (is_set == 2)
873 int compare_size = GET_MODE_BITSIZE (comp_mode);
874 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
875 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
877 rtx op_t = operands[2];
878 rtx op_f = operands[3];
880 /* The result of the comparison can be SI, HI or QI mode. Create a
881 mask based on that result. */
882 if (target_size > compare_size)
884 select_mask = gen_reg_rtx (mode);
885 emit_insn (gen_extend_compare (select_mask, compare_result));
887 else if (target_size < compare_size)
889 gen_rtx_SUBREG (mode, compare_result,
890 (compare_size - target_size) / BITS_PER_UNIT);
891 else if (comp_mode != mode)
892 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
894 select_mask = compare_result;
896 if (GET_MODE (target) != GET_MODE (op_t)
897 || GET_MODE (target) != GET_MODE (op_f))
901 emit_insn (gen_selb (target, op_t, op_f, select_mask));
903 emit_insn (gen_selb (target, op_f, op_t, select_mask));
908 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
909 gen_rtx_NOT (comp_mode, compare_result)));
910 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
911 emit_insn (gen_extendhisi2 (target, compare_result));
912 else if (GET_MODE (target) == SImode
913 && GET_MODE (compare_result) == QImode)
914 emit_insn (gen_extend_compare (target, compare_result));
916 emit_move_insn (target, compare_result);
921 const_double_to_hwint (rtx x)
925 if (GET_MODE (x) == SFmode)
927 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
928 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
930 else if (GET_MODE (x) == DFmode)
933 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
934 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
936 val = (val << 32) | (l[1] & 0xffffffff);
944 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
948 gcc_assert (mode == SFmode || mode == DFmode);
951 tv[0] = (v << 32) >> 32;
952 else if (mode == DFmode)
954 tv[1] = (v << 32) >> 32;
957 real_from_target (&rv, tv, mode);
958 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
962 print_operand_address (FILE * file, register rtx addr)
967 switch (GET_CODE (addr))
970 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
974 reg = XEXP (addr, 0);
975 offset = XEXP (addr, 1);
976 if (GET_CODE (offset) == REG)
978 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
979 reg_names[REGNO (offset)]);
981 else if (GET_CODE (offset) == CONST_INT)
983 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
984 INTVAL (offset), reg_names[REGNO (reg)]);
994 output_addr_const (file, addr);
1004 print_operand (FILE * file, rtx x, int code)
1006 enum machine_mode mode = GET_MODE (x);
1008 unsigned char arr[16];
1009 int xcode = GET_CODE (x);
1010 if (GET_MODE (x) == VOIDmode)
1013 case 'H': /* 128 bits, signed */
1014 case 'L': /* 128 bits, signed */
1015 case 'm': /* 128 bits, signed */
1016 case 'T': /* 128 bits, signed */
1017 case 't': /* 128 bits, signed */
1020 case 'G': /* 64 bits, signed */
1021 case 'K': /* 64 bits, signed */
1022 case 'k': /* 64 bits, signed */
1023 case 'D': /* 64 bits, signed */
1024 case 'd': /* 64 bits, signed */
1027 case 'F': /* 32 bits, signed */
1028 case 'J': /* 32 bits, signed */
1029 case 'j': /* 32 bits, signed */
1030 case 's': /* 32 bits, signed */
1031 case 'S': /* 32 bits, signed */
1038 case 'j': /* 32 bits, signed */
1039 case 'k': /* 64 bits, signed */
1040 case 'm': /* 128 bits, signed */
1041 if (xcode == CONST_INT
1042 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1044 gcc_assert (logical_immediate_p (x, mode));
1045 constant_to_array (mode, x, arr);
1046 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1047 val = trunc_int_for_mode (val, SImode);
1048 switch (which_logical_immediate (val))
1053 fprintf (file, "h");
1056 fprintf (file, "b");
1066 case 'J': /* 32 bits, signed */
1067 case 'K': /* 64 bits, signed */
1068 case 'L': /* 128 bits, signed */
1069 if (xcode == CONST_INT
1070 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1072 gcc_assert (logical_immediate_p (x, mode)
1073 || iohl_immediate_p (x, mode));
1074 constant_to_array (mode, x, arr);
1075 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1076 val = trunc_int_for_mode (val, SImode);
1077 switch (which_logical_immediate (val))
1083 val = trunc_int_for_mode (val, HImode);
1086 val = trunc_int_for_mode (val, QImode);
1091 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1097 case 't': /* 128 bits, signed */
1098 case 'd': /* 64 bits, signed */
1099 case 's': /* 32 bits, signed */
1100 if (xcode == CONST_INT
1101 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1103 gcc_assert (immediate_load_p (x, mode));
1104 constant_to_array (mode, x, arr);
1105 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1106 val = trunc_int_for_mode (val, SImode);
1107 switch (which_immediate_load (val))
1112 fprintf (file, "a");
1115 fprintf (file, "h");
1118 fprintf (file, "hu");
1124 else if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1125 fprintf (file, "a");
1130 case 'T': /* 128 bits, signed */
1131 case 'D': /* 64 bits, signed */
1132 case 'S': /* 32 bits, signed */
1133 if (xcode == CONST_INT
1134 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1136 gcc_assert (immediate_load_p (x, mode));
1137 constant_to_array (mode, x, arr);
1138 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1139 val = trunc_int_for_mode (val, SImode);
1140 switch (which_immediate_load (val))
1147 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1152 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1154 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1155 output_addr_const (file, x);
1163 if (xcode == CONST_INT
1164 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1165 { /* immediate operand for fsmbi */
1167 HOST_WIDE_INT val = 0;
1168 unsigned char arr[16];
1169 constant_to_array (mode, x, arr);
1170 for (i = 0; i < 16; i++)
1175 print_operand (file, GEN_INT (val), 0);
1182 if (xcode == CONST_INT)
1184 /* Only 4 least significant bits are relevant for generate
1185 control word instructions. */
1186 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1191 case 'M': /* print code for c*d */
1192 if (GET_CODE (x) == CONST_INT)
1196 fprintf (file, "b");
1199 fprintf (file, "h");
1202 fprintf (file, "w");
1205 fprintf (file, "d");
1214 case 'N': /* Negate the operand */
1215 if (xcode == CONST_INT)
1216 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1217 else if (xcode == CONST_VECTOR)
1218 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1219 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1222 case 'I': /* enable/disable interrupts */
1223 if (xcode == CONST_INT)
1224 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1227 case 'b': /* branch modifiers */
1229 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1230 else if (COMPARISON_P (x))
1231 fprintf (file, "%s", xcode == NE ? "n" : "");
1234 case 'i': /* indirect call */
1237 if (GET_CODE (XEXP (x, 0)) == REG)
1238 /* Used in indirect function calls. */
1239 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1241 output_address (XEXP (x, 0));
1245 case 'p': /* load/store */
1249 xcode = GET_CODE (x);
1252 fprintf (file, "d");
1253 else if (xcode == CONST_INT)
1254 fprintf (file, "a");
1255 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1256 fprintf (file, "r");
1257 else if (xcode == PLUS || xcode == LO_SUM)
1259 if (GET_CODE (XEXP (x, 1)) == REG)
1260 fprintf (file, "x");
1262 fprintf (file, "d");
1268 fprintf (file, "%s", reg_names[REGNO (x)]);
1269 else if (xcode == MEM)
1270 output_address (XEXP (x, 0));
1271 else if (xcode == CONST_VECTOR)
1272 output_addr_const (file, CONST_VECTOR_ELT (x, 0));
1274 output_addr_const (file, x);
1278 output_operand_lossage ("invalid %%xn code");
1283 extern char call_used_regs[];
1284 extern char regs_ever_live[];
1286 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1287 caller saved register. For leaf functions it is more efficient to
1288 use a volatile register because we won't need to save and restore the
1289 pic register. This routine is only valid after register allocation
1290 is completed, so we can pick an unused register. */
1294 rtx pic_reg = pic_offset_table_rtx;
1295 if (!reload_completed && !reload_in_progress)
1300 /* SAVING is TRUE when we are generating the actual load and store
1301 instructions for REGNO. When determining the size of the stack
1302 needed for saving register we must allocate enough space for the
1303 worst case, because we don't always have the information early enough
1304 to not allocate it. But we can at least eliminate the actual loads
1305 and stores during the prologue/epilogue. */
1307 need_to_save_reg (int regno, int saving)
1309 if (regs_ever_live[regno] && !call_used_regs[regno])
1312 && regno == PIC_OFFSET_TABLE_REGNUM
1313 && (!saving || current_function_uses_pic_offset_table)
1315 || !current_function_is_leaf || regs_ever_live[LAST_ARG_REGNUM]))
1320 /* This function is only correct starting with local register
1323 spu_saved_regs_size (void)
1325 int reg_save_size = 0;
1328 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1329 if (need_to_save_reg (regno, 0))
1330 reg_save_size += 0x10;
1331 return reg_save_size;
1335 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1337 rtx reg = gen_rtx_REG (V4SImode, regno);
1339 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1340 return emit_insn (gen_movv4si (mem, reg));
1344 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1346 rtx reg = gen_rtx_REG (V4SImode, regno);
1348 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1349 return emit_insn (gen_movv4si (reg, mem));
1352 /* This happens after reload, so we need to expand it. */
1354 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1357 if (satisfies_constraint_K (GEN_INT (imm)))
1359 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1363 insn = emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1364 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1366 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1367 if (REGNO (src) == REGNO (scratch))
1370 if (REGNO (dst) == REGNO (scratch))
1371 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1376 /* Return nonzero if this function is known to have a null epilogue. */
1379 direct_return (void)
1381 if (reload_completed)
1383 if (cfun->static_chain_decl == 0
1384 && (spu_saved_regs_size ()
1386 + current_function_outgoing_args_size
1387 + current_function_pretend_args_size == 0)
1388 && current_function_is_leaf)
1395 The stack frame looks like this:
1402 prev SP | back chain |
1405 | reg save | current_function_pretend_args_size bytes
1408 | saved regs | spu_saved_regs_size() bytes
1411 FP | vars | get_frame_size() bytes
1415 | args | current_function_outgoing_args_size bytes
1425 spu_expand_prologue (void)
1427 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1428 HOST_WIDE_INT total_size;
1429 HOST_WIDE_INT saved_regs_size;
1430 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1431 rtx scratch_reg_0, scratch_reg_1;
1434 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1435 the "toplevel" insn chain. */
1436 emit_note (NOTE_INSN_DELETED);
1438 if (flag_pic && optimize == 0)
1439 current_function_uses_pic_offset_table = 1;
1441 if (spu_naked_function_p (current_function_decl))
1444 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1445 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1447 saved_regs_size = spu_saved_regs_size ();
1448 total_size = size + saved_regs_size
1449 + current_function_outgoing_args_size
1450 + current_function_pretend_args_size;
1452 if (!current_function_is_leaf
1453 || current_function_calls_alloca || total_size > 0)
1454 total_size += STACK_POINTER_OFFSET;
1456 /* Save this first because code after this might use the link
1457 register as a scratch register. */
1458 if (!current_function_is_leaf)
1460 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1461 RTX_FRAME_RELATED_P (insn) = 1;
1466 offset = -current_function_pretend_args_size;
1467 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1468 if (need_to_save_reg (regno, 1))
1471 insn = frame_emit_store (regno, sp_reg, offset);
1472 RTX_FRAME_RELATED_P (insn) = 1;
1476 if (flag_pic && current_function_uses_pic_offset_table)
1478 rtx pic_reg = get_pic_reg ();
1479 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1480 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1482 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1483 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
1489 if (flag_stack_check)
1491 /* We compare agains total_size-1 because
1492 ($sp >= total_size) <=> ($sp > total_size-1) */
1493 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1494 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1495 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1496 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1498 emit_move_insn (scratch_v4si, size_v4si);
1499 size_v4si = scratch_v4si;
1501 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1502 emit_insn (gen_vec_extractv4si
1503 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1504 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1507 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1508 the value of the previous $sp because we save it as the back
1510 if (total_size <= 2000)
1512 /* In this case we save the back chain first. */
1513 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1514 RTX_FRAME_RELATED_P (insn) = 1;
1516 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1518 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1520 insn = emit_move_insn (scratch_reg_0, sp_reg);
1521 RTX_FRAME_RELATED_P (insn) = 1;
1523 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1527 insn = emit_move_insn (scratch_reg_0, sp_reg);
1528 RTX_FRAME_RELATED_P (insn) = 1;
1530 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1532 RTX_FRAME_RELATED_P (insn) = 1;
1533 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1535 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1537 if (total_size > 2000)
1539 /* Save the back chain ptr */
1540 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1541 RTX_FRAME_RELATED_P (insn) = 1;
1544 if (frame_pointer_needed)
1546 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1547 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1548 + current_function_outgoing_args_size;
1549 /* Set the new frame_pointer */
1550 frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1554 emit_note (NOTE_INSN_DELETED);
1558 spu_expand_epilogue (bool sibcall_p)
1560 int size = get_frame_size (), offset, regno;
1561 HOST_WIDE_INT saved_regs_size, total_size;
1562 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1563 rtx jump, scratch_reg_0;
1565 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1566 the "toplevel" insn chain. */
1567 emit_note (NOTE_INSN_DELETED);
1569 if (spu_naked_function_p (current_function_decl))
1572 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1574 saved_regs_size = spu_saved_regs_size ();
1575 total_size = size + saved_regs_size
1576 + current_function_outgoing_args_size
1577 + current_function_pretend_args_size;
1579 if (!current_function_is_leaf
1580 || current_function_calls_alloca || total_size > 0)
1581 total_size += STACK_POINTER_OFFSET;
1585 if (current_function_calls_alloca)
1586 /* Load it from the back chain because our save_stack_block and
1587 restore_stack_block do nothing. */
1588 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1590 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1593 if (saved_regs_size > 0)
1595 offset = -current_function_pretend_args_size;
1596 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1597 if (need_to_save_reg (regno, 1))
1600 frame_emit_load (regno, sp_reg, offset);
1605 if (!current_function_is_leaf)
1606 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1610 emit_insn (gen_rtx_USE
1611 (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)));
1612 jump = emit_jump_insn (gen__return ());
1613 emit_barrier_after (jump);
1616 emit_note (NOTE_INSN_DELETED);
1620 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1624 /* This is inefficient because it ends up copying to a save-register
1625 which then gets saved even though $lr has already been saved. But
1626 it does generate better code for leaf functions and we don't need
1627 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1628 used for __builtin_return_address anyway, so maybe we don't care if
1629 it's inefficient. */
1630 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1634 /* Given VAL, generate a constant appropriate for MODE.
1635 If MODE is a vector mode, every element will be VAL.
1636 For TImode, VAL will be zero extended to 128 bits. */
1638 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1644 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1645 || GET_MODE_CLASS (mode) == MODE_FLOAT
1646 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1647 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1649 if (GET_MODE_CLASS (mode) == MODE_INT)
1650 return immed_double_const (val, 0, mode);
1652 /* val is the bit representation of the float */
1653 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1654 return hwint_to_const_double (mode, val);
1656 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1657 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1659 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1661 units = GET_MODE_NUNITS (mode);
1663 v = rtvec_alloc (units);
1665 for (i = 0; i < units; ++i)
1666 RTVEC_ELT (v, i) = inner;
1668 return gen_rtx_CONST_VECTOR (mode, v);
1671 /* branch hint stuff */
1673 /* The hardware requires 8 insns between a hint and the branch it
1674 effects. This variable describes how many rtl instructions the
1675 compiler needs to see before inserting a hint. (FIXME: We should
1676 accept less and insert nops to enforce it because hinting is always
1677 profitable for performance, but we do need to be careful of code
1679 int spu_hint_dist = (8 * 4);
1681 /* An array of these is used to propagate hints to predecessor blocks. */
1684 rtx prop_jump; /* propogated from another block */
1685 basic_block bb; /* the orignal block. */
1688 /* The special $hbr register is used to prevent the insn scheduler from
1689 moving hbr insns across instructions which invalidate them. It
1690 should only be used in a clobber, and this function searches for
1691 insns which clobber it. */
1693 insn_clobbers_hbr (rtx insn)
1695 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
1697 rtx parallel = PATTERN (insn);
1700 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
1702 clobber = XVECEXP (parallel, 0, j);
1703 if (GET_CODE (clobber) == CLOBBER
1704 && GET_CODE (XEXP (clobber, 0)) == REG
1705 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
1713 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
1716 rtx hint, insn, prev, next;
1718 if (before == 0 || branch == 0 || target == 0)
1725 branch_label = gen_label_rtx ();
1726 LABEL_NUSES (branch_label)++;
1727 LABEL_PRESERVE_P (branch_label) = 1;
1728 insn = emit_label_before (branch_label, branch);
1729 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
1731 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1732 the current insn is pipe0, dual issue with it. */
1733 prev = prev_active_insn (before);
1734 if (prev && get_pipe (prev) == 0)
1735 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1736 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
1738 next = next_active_insn (before);
1739 hint = emit_insn_after (gen_hbr (branch_label, target), before);
1741 PUT_MODE (next, TImode);
1745 hint = emit_insn_before (gen_hbr (branch_label, target), before);
1746 PUT_MODE (hint, TImode);
1748 recog_memoized (hint);
1751 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1752 the rtx for the branch target. */
1754 get_branch_target (rtx branch)
1756 if (GET_CODE (branch) == JUMP_INSN)
1760 /* Return statements */
1761 if (GET_CODE (PATTERN (branch)) == RETURN)
1762 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1765 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
1766 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
1769 set = single_set (branch);
1770 src = SET_SRC (set);
1771 if (GET_CODE (SET_DEST (set)) != PC)
1774 if (GET_CODE (src) == IF_THEN_ELSE)
1777 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
1780 /* If the more probable case is not a fall through, then
1781 try a branch hint. */
1782 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
1783 if (prob > (REG_BR_PROB_BASE * 6 / 10)
1784 && GET_CODE (XEXP (src, 1)) != PC)
1785 lab = XEXP (src, 1);
1786 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
1787 && GET_CODE (XEXP (src, 2)) != PC)
1788 lab = XEXP (src, 2);
1792 if (GET_CODE (lab) == RETURN)
1793 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
1801 else if (GET_CODE (branch) == CALL_INSN)
1804 /* All of our call patterns are in a PARALLEL and the CALL is
1805 the first pattern in the PARALLEL. */
1806 if (GET_CODE (PATTERN (branch)) != PARALLEL)
1808 call = XVECEXP (PATTERN (branch), 0, 0);
1809 if (GET_CODE (call) == SET)
1810 call = SET_SRC (call);
1811 if (GET_CODE (call) != CALL)
1813 return XEXP (XEXP (call, 0), 0);
1819 insert_branch_hints (void)
1821 struct spu_bb_info *spu_bb_info;
1822 rtx branch, insn, next;
1823 rtx branch_target = 0;
1824 int branch_addr = 0, insn_addr, head_addr;
1829 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
1830 sizeof (struct spu_bb_info));
1832 /* We need exact insn addresses and lengths. */
1833 shorten_branches (get_insns ());
1835 FOR_EACH_BB_REVERSE (bb)
1837 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
1839 if (spu_bb_info[bb->index].prop_jump)
1841 branch = spu_bb_info[bb->index].prop_jump;
1842 branch_target = get_branch_target (branch);
1843 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
1845 /* Search from end of a block to beginning. In this loop, find
1846 jumps which need a branch and emit them only when:
1847 - it's an indirect branch and we're at the insn which sets
1849 - we're at an insn that will invalidate the hint. e.g., a
1850 call, another hint insn, inline asm that clobbers $hbr, and
1851 some inlined operations (divmodsi4). Don't consider jumps
1852 because they are only at the end of a block and are
1853 considered when we are deciding whether to propagate
1854 - we're getting too far away from the branch. The hbr insns
1855 only have a signed 10 bit offset
1856 We go back as far as possible so the branch will be considered
1857 for propagation when we get to the beginning of the block. */
1859 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
1863 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
1865 && ((GET_CODE (branch_target) == REG
1866 && set_of (branch_target, insn) != NULL_RTX)
1867 || insn_clobbers_hbr (insn)
1868 || branch_addr - insn_addr > 600))
1870 int next_addr = INSN_ADDRESSES (INSN_UID (next));
1871 if (insn != BB_END (bb)
1872 && branch_addr - next_addr >= spu_hint_dist)
1876 "hint for %i in block %i before %i\n",
1877 INSN_UID (branch), bb->index, INSN_UID (next));
1878 spu_emit_branch_hint (next, branch, branch_target,
1879 branch_addr - next_addr);
1884 /* JUMP_P will only be true at the end of a block. When
1885 branch is already set it means we've previously decided
1886 to propagate a hint for that branch into this block. */
1887 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
1890 if ((branch_target = get_branch_target (insn)))
1893 branch_addr = insn_addr;
1897 /* When a branch hint is emitted it will be inserted
1898 before "next". Make sure next is the beginning of a
1899 cycle to minimize impact on the scheduled insns. */
1900 if (GET_MODE (insn) == TImode)
1903 if (insn == BB_HEAD (bb))
1909 /* If we haven't emitted a hint for this branch yet, it might
1910 be profitable to emit it in one of the predecessor blocks,
1911 especially for loops. */
1913 basic_block prev = 0, prop = 0, prev2 = 0;
1914 int loop_exit = 0, simple_loop = 0;
1917 next_addr = INSN_ADDRESSES (INSN_UID (next));
1919 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
1920 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
1921 prev = EDGE_PRED (bb, j)->src;
1923 prev2 = EDGE_PRED (bb, j)->src;
1925 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
1926 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
1928 else if (EDGE_SUCC (bb, j)->dest == bb)
1931 /* If this branch is a loop exit then propagate to previous
1932 fallthru block. This catches the cases when it is a simple
1933 loop or when there is an initial branch into the loop. */
1934 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
1937 /* If there is only one adjacent predecessor. Don't propagate
1938 outside this loop. This loop_depth test isn't perfect, but
1939 I'm not sure the loop_father member is valid at this point. */
1940 else if (prev && single_pred_p (bb)
1941 && prev->loop_depth == bb->loop_depth)
1944 /* If this is the JOIN block of a simple IF-THEN then
1945 propogate the hint to the HEADER block. */
1946 else if (prev && prev2
1947 && EDGE_COUNT (bb->preds) == 2
1948 && EDGE_COUNT (prev->preds) == 1
1949 && EDGE_PRED (prev, 0)->src == prev2
1950 && prev2->loop_depth == bb->loop_depth
1951 && GET_CODE (branch_target) != REG)
1954 /* Don't propagate when:
1955 - this is a simple loop and the hint would be too far
1956 - this is not a simple loop and there are 16 insns in
1958 - the predecessor block ends in a branch that will be
1960 - the predecessor block ends in an insn that invalidates
1964 && (bbend = BB_END (prop))
1965 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
1966 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
1967 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
1970 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
1971 "for %i (loop_exit %i simple_loop %i dist %i)\n",
1972 bb->index, prop->index, bb->loop_depth,
1973 INSN_UID (branch), loop_exit, simple_loop,
1974 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
1976 spu_bb_info[prop->index].prop_jump = branch;
1977 spu_bb_info[prop->index].bb = bb;
1979 else if (next && branch_addr - next_addr >= spu_hint_dist)
1982 fprintf (dump_file, "hint for %i in block %i before %i\n",
1983 INSN_UID (branch), bb->index, INSN_UID (next));
1984 spu_emit_branch_hint (next, branch, branch_target,
1985 branch_addr - next_addr);
1993 /* Emit a nop for INSN such that the two will dual issue. This assumes
1994 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1995 We check for TImode to handle a MULTI1 insn which has dual issued its
1996 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
1999 emit_nop_for_insn (rtx insn)
2003 p = get_pipe (insn);
2004 if (p == 1 && GET_MODE (insn) == TImode)
2006 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2007 PUT_MODE (new_insn, TImode);
2008 PUT_MODE (insn, VOIDmode);
2011 new_insn = emit_insn_after (gen_lnop (), insn);
2014 /* Insert nops in basic blocks to meet dual issue alignment
2019 rtx insn, next_insn, prev_insn;
2023 /* This sets up INSN_ADDRESSES. */
2024 shorten_branches (get_insns ());
2026 /* Keep track of length added by nops. */
2030 for (insn = get_insns (); insn; insn = next_insn)
2032 next_insn = next_active_insn (insn);
2033 addr = INSN_ADDRESSES (INSN_UID (insn));
2034 if (GET_MODE (insn) == TImode
2036 && GET_MODE (next_insn) != TImode
2037 && ((addr + length) & 7) != 0)
2039 /* prev_insn will always be set because the first insn is
2040 always 8-byte aligned. */
2041 emit_nop_for_insn (prev_insn);
2049 spu_machine_dependent_reorg (void)
2053 if (TARGET_BRANCH_HINTS)
2054 insert_branch_hints ();
2060 /* Insn scheduling routines, primarily for dual issue. */
2062 spu_sched_issue_rate (void)
2068 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2069 int verbose ATTRIBUTE_UNUSED, rtx insn,
2072 if (GET_CODE (PATTERN (insn)) != USE
2073 && GET_CODE (PATTERN (insn)) != CLOBBER
2074 && get_pipe (insn) != -2)
2076 return can_issue_more;
2083 /* Handle inline asm */
2084 if (INSN_CODE (insn) == -1)
2086 t = get_attr_type (insn);
2102 case TYPE_IPREFETCH:
2119 spu_sched_adjust_priority (rtx insn, int pri)
2121 int p = get_pipe (insn);
2122 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2124 if (GET_CODE (PATTERN (insn)) == USE
2125 || GET_CODE (PATTERN (insn)) == CLOBBER
2128 /* Schedule pipe0 insns early for greedier dual issue. */
2134 /* INSN is dependent on DEP_INSN. */
2136 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2137 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2139 if (GET_CODE (insn) == CALL_INSN)
2141 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2142 scheduler makes every insn in a block anti-dependent on the final
2143 jump_insn. We adjust here so higher cost insns will get scheduled
2145 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2146 return INSN_COST (dep_insn) - 3;
2150 /* Create a CONST_DOUBLE from a string. */
2152 spu_float_const (const char *string, enum machine_mode mode)
2154 REAL_VALUE_TYPE value;
2155 value = REAL_VALUE_ATOF (string, mode);
2156 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2159 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2160 CONST_INT fits constraint 'K', i.e., is small. */
2162 legitimate_const (rtx x, int aligned)
2164 /* We can never know if the resulting address fits in 18 bits and can be
2165 loaded with ila. Instead we should use the HI and LO relocations to
2166 load a 32 bit address. */
2169 gcc_assert (GET_CODE (x) == CONST);
2171 if (GET_CODE (XEXP (x, 0)) != PLUS)
2173 sym = XEXP (XEXP (x, 0), 0);
2174 cst = XEXP (XEXP (x, 0), 1);
2175 if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT)
2177 if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym)))
2179 return satisfies_constraint_K (cst);
2183 spu_constant_address_p (rtx x)
2185 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2186 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2187 || GET_CODE (x) == HIGH);
2190 static enum spu_immediate
2191 which_immediate_load (HOST_WIDE_INT val)
2193 gcc_assert (val == trunc_int_for_mode (val, SImode));
2195 if (val >= -0x8000 && val <= 0x7fff)
2197 if (val >= 0 && val <= 0x3ffff)
2199 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2201 if ((val & 0xffff) == 0)
2208 immediate_load_p (rtx op, enum machine_mode mode)
2211 unsigned char arr[16];
2213 if (GET_MODE (op) != VOIDmode)
2214 mode = GET_MODE (op);
2216 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2217 || GET_CODE (op) == CONST_VECTOR);
2219 /* V4SI with all identical symbols is valid. */
2220 if (mode == V4SImode
2221 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == SYMBOL_REF)
2222 return !TARGET_LARGE_MEM && !flag_pic
2223 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2224 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2225 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3);
2227 constant_to_array (mode, op, arr);
2229 /* Check that bytes are repeated. */
2230 for (i = 4; i < 16; i += 4)
2231 for (j = 0; j < 4; j++)
2232 if (arr[j] != arr[i + j])
2235 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2236 val = trunc_int_for_mode (val, SImode);
2238 return which_immediate_load (val) != SPU_NONE;
2241 static enum spu_immediate
2242 which_logical_immediate (HOST_WIDE_INT val)
2244 gcc_assert (val == trunc_int_for_mode (val, SImode));
2246 if (val >= -0x200 && val <= 0x1ff)
2248 if (val >= 0 && val <= 0xffff)
2250 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2252 val = trunc_int_for_mode (val, HImode);
2253 if (val >= -0x200 && val <= 0x1ff)
2255 if ((val & 0xff) == ((val >> 8) & 0xff))
2257 val = trunc_int_for_mode (val, QImode);
2258 if (val >= -0x200 && val <= 0x1ff)
2266 logical_immediate_p (rtx op, enum machine_mode mode)
2269 unsigned char arr[16];
2272 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2273 || GET_CODE (op) == CONST_VECTOR);
2275 if (GET_MODE (op) != VOIDmode)
2276 mode = GET_MODE (op);
2278 constant_to_array (mode, op, arr);
2280 /* Check that bytes are repeated. */
2281 for (i = 4; i < 16; i += 4)
2282 for (j = 0; j < 4; j++)
2283 if (arr[j] != arr[i + j])
2286 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2287 val = trunc_int_for_mode (val, SImode);
2289 i = which_logical_immediate (val);
2290 return i != SPU_NONE && i != SPU_IOHL;
2294 iohl_immediate_p (rtx op, enum machine_mode mode)
2297 unsigned char arr[16];
2300 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2301 || GET_CODE (op) == CONST_VECTOR);
2303 if (GET_MODE (op) != VOIDmode)
2304 mode = GET_MODE (op);
2306 constant_to_array (mode, op, arr);
2308 /* Check that bytes are repeated. */
2309 for (i = 4; i < 16; i += 4)
2310 for (j = 0; j < 4; j++)
2311 if (arr[j] != arr[i + j])
2314 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2315 val = trunc_int_for_mode (val, SImode);
2317 return val >= 0 && val <= 0xffff;
2321 arith_immediate_p (rtx op, enum machine_mode mode,
2322 HOST_WIDE_INT low, HOST_WIDE_INT high)
2325 unsigned char arr[16];
2328 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2329 || GET_CODE (op) == CONST_VECTOR);
2331 if (GET_MODE (op) != VOIDmode)
2332 mode = GET_MODE (op);
2334 constant_to_array (mode, op, arr);
2336 if (VECTOR_MODE_P (mode))
2337 mode = GET_MODE_INNER (mode);
2339 bytes = GET_MODE_SIZE (mode);
2340 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2342 /* Check that bytes are repeated. */
2343 for (i = bytes; i < 16; i += bytes)
2344 for (j = 0; j < bytes; j++)
2345 if (arr[j] != arr[i + j])
2349 for (j = 1; j < bytes; j++)
2350 val = (val << 8) | arr[j];
2352 val = trunc_int_for_mode (val, mode);
2354 return val >= low && val <= high;
2358 - any 32 bit constant (SImode, SFmode)
2359 - any constant that can be generated with fsmbi (any mode)
2360 - a 64 bit constant where the high and low bits are identical
2362 - a 128 bit constant where the four 32 bit words match. */
2364 spu_legitimate_constant_p (rtx x)
2366 unsigned char arr[16];
2369 if (GET_CODE (x) == HIGH
2370 || GET_CODE (x) == CONST
2371 || GET_CODE (x) == SYMBOL_REF
2372 || GET_CODE (x) == LABEL_REF)
2375 if (fsmbi_const_p (x))
2378 if (GET_CODE (x) == CONST_INT)
2379 return (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0x7fffffffll)
2380 || ((INTVAL (x) >> 32) & 0xffffffffll) == (INTVAL (x) & 0xffffffffll);
2382 if (GET_MODE (x) == SFmode)
2385 if (GET_MODE (x) == DFmode)
2387 HOST_WIDE_INT val = const_double_to_hwint (x);
2388 return ((val >> 32) & 0xffffffffll) == (val & 0xffffffffll);
2391 /* V4SI with all identical symbols is valid. */
2392 if (GET_MODE (x) == V4SImode
2393 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2394 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2395 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST
2396 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == HIGH))
2397 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2398 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2399 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2401 if (VECTOR_MODE_P (GET_MODE (x)))
2402 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2403 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2404 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2407 constant_to_array (SImode, x, arr);
2409 /* Check that bytes are repeated. */
2410 for (i = 4; i < 16; i += 4)
2411 for (j = 0; j < 4; j++)
2412 if (arr[j] != arr[i + j])
2418 /* Valid address are:
2419 - symbol_ref, label_ref, const
2421 - reg + const, where either reg or const is 16 byte aligned
2422 - reg + reg, alignment doesn't matter
2423 The alignment matters in the reg+const case because lqd and stqd
2424 ignore the 4 least significant bits of the const. (TODO: It might be
2425 preferable to allow any alignment and fix it up when splitting.) */
2427 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2428 rtx x, int reg_ok_strict)
2430 if (mode == TImode && GET_CODE (x) == AND
2431 && GET_CODE (XEXP (x, 1)) == CONST_INT
2432 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2434 switch (GET_CODE (x))
2438 return !TARGET_LARGE_MEM;
2441 return !TARGET_LARGE_MEM && legitimate_const (x, 1);
2444 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2448 gcc_assert (GET_CODE (x) == REG);
2451 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2456 rtx op0 = XEXP (x, 0);
2457 rtx op1 = XEXP (x, 1);
2458 if (GET_CODE (op0) == SUBREG)
2459 op0 = XEXP (op0, 0);
2460 if (GET_CODE (op1) == SUBREG)
2461 op1 = XEXP (op1, 0);
2462 /* We can't just accept any aligned register because CSE can
2463 change it to a register that is not marked aligned and then
2464 recog will fail. So we only accept frame registers because
2465 they will only be changed to other frame registers. */
2466 if (GET_CODE (op0) == REG
2467 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2468 && GET_CODE (op1) == CONST_INT
2469 && INTVAL (op1) >= -0x2000
2470 && INTVAL (op1) <= 0x1fff
2471 && (REGNO_PTR_FRAME_P (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2473 if (GET_CODE (op0) == REG
2474 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2475 && GET_CODE (op1) == REG
2476 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2487 /* When the address is reg + const_int, force the const_int into a
2490 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2491 enum machine_mode mode)
2494 /* Make sure both operands are registers. */
2495 if (GET_CODE (x) == PLUS)
2499 if (ALIGNED_SYMBOL_REF_P (op0))
2501 op0 = force_reg (Pmode, op0);
2502 mark_reg_pointer (op0, 128);
2504 else if (GET_CODE (op0) != REG)
2505 op0 = force_reg (Pmode, op0);
2506 if (ALIGNED_SYMBOL_REF_P (op1))
2508 op1 = force_reg (Pmode, op1);
2509 mark_reg_pointer (op1, 128);
2511 else if (GET_CODE (op1) != REG)
2512 op1 = force_reg (Pmode, op1);
2513 x = gen_rtx_PLUS (Pmode, op0, op1);
2514 if (spu_legitimate_address (mode, x, 0))
2520 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2521 struct attribute_spec.handler. */
2523 spu_handle_fndecl_attribute (tree * node,
2525 tree args ATTRIBUTE_UNUSED,
2526 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2528 if (TREE_CODE (*node) != FUNCTION_DECL)
2530 warning (0, "`%s' attribute only applies to functions",
2531 IDENTIFIER_POINTER (name));
2532 *no_add_attrs = true;
2538 /* Handle the "vector" attribute. */
2540 spu_handle_vector_attribute (tree * node, tree name,
2541 tree args ATTRIBUTE_UNUSED,
2542 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2544 tree type = *node, result = NULL_TREE;
2545 enum machine_mode mode;
2548 while (POINTER_TYPE_P (type)
2549 || TREE_CODE (type) == FUNCTION_TYPE
2550 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2551 type = TREE_TYPE (type);
2553 mode = TYPE_MODE (type);
2555 unsigned_p = TYPE_UNSIGNED (type);
2559 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2562 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2565 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
2568 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
2571 result = V4SF_type_node;
2574 result = V2DF_type_node;
2580 /* Propagate qualifiers attached to the element type
2581 onto the vector type. */
2582 if (result && result != type && TYPE_QUALS (type))
2583 result = build_qualified_type (result, TYPE_QUALS (type));
2585 *no_add_attrs = true; /* No need to hang on to the attribute. */
2588 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
2590 *node = reconstruct_complex_type (*node, result);
2595 /* Return non-zero if FUNC is a naked function. */
2597 spu_naked_function_p (tree func)
2601 if (TREE_CODE (func) != FUNCTION_DECL)
2604 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
2605 return a != NULL_TREE;
2609 spu_initial_elimination_offset (int from, int to)
2611 int saved_regs_size = spu_saved_regs_size ();
2613 if (!current_function_is_leaf || current_function_outgoing_args_size
2614 || get_frame_size () || saved_regs_size)
2615 sp_offset = STACK_POINTER_OFFSET;
2616 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2617 return (sp_offset + current_function_outgoing_args_size);
2618 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2620 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
2621 return sp_offset + current_function_outgoing_args_size
2622 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
2623 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2624 return get_frame_size () + saved_regs_size + sp_offset;
2629 spu_function_value (tree type, tree func ATTRIBUTE_UNUSED)
2631 enum machine_mode mode = TYPE_MODE (type);
2632 int byte_size = ((mode == BLKmode)
2633 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2635 /* Make sure small structs are left justified in a register. */
2636 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2637 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
2639 enum machine_mode smode;
2642 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2643 int n = byte_size / UNITS_PER_WORD;
2644 v = rtvec_alloc (nregs);
2645 for (i = 0; i < n; i++)
2647 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
2648 gen_rtx_REG (TImode,
2651 GEN_INT (UNITS_PER_WORD * i));
2652 byte_size -= UNITS_PER_WORD;
2660 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2662 gen_rtx_EXPR_LIST (VOIDmode,
2663 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
2664 GEN_INT (UNITS_PER_WORD * n));
2666 return gen_rtx_PARALLEL (mode, v);
2668 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
2672 spu_function_arg (CUMULATIVE_ARGS cum,
2673 enum machine_mode mode,
2674 tree type, int named ATTRIBUTE_UNUSED)
2678 if (cum >= MAX_REGISTER_ARGS)
2681 byte_size = ((mode == BLKmode)
2682 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
2684 /* The ABI does not allow parameters to be passed partially in
2685 reg and partially in stack. */
2686 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
2689 /* Make sure small structs are left justified in a register. */
2690 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
2691 && byte_size < UNITS_PER_WORD && byte_size > 0)
2693 enum machine_mode smode;
2697 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
2698 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
2699 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
2701 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
2704 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
2707 /* Variable sized types are passed by reference. */
2709 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
2710 enum machine_mode mode ATTRIBUTE_UNUSED,
2711 tree type, bool named ATTRIBUTE_UNUSED)
2713 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2719 /* Create and return the va_list datatype.
2721 On SPU, va_list is an array type equivalent to
2723 typedef struct __va_list_tag
2725 void *__args __attribute__((__aligned(16)));
2726 void *__skip __attribute__((__aligned(16)));
2730 wheare __args points to the arg that will be returned by the next
2731 va_arg(), and __skip points to the previous stack frame such that
2732 when __args == __skip we should advance __args by 32 bytes. */
2734 spu_build_builtin_va_list (void)
2736 tree f_args, f_skip, record, type_decl;
2739 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2742 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2744 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
2745 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
2747 DECL_FIELD_CONTEXT (f_args) = record;
2748 DECL_ALIGN (f_args) = 128;
2749 DECL_USER_ALIGN (f_args) = 1;
2751 DECL_FIELD_CONTEXT (f_skip) = record;
2752 DECL_ALIGN (f_skip) = 128;
2753 DECL_USER_ALIGN (f_skip) = 1;
2755 TREE_CHAIN (record) = type_decl;
2756 TYPE_NAME (record) = type_decl;
2757 TYPE_FIELDS (record) = f_args;
2758 TREE_CHAIN (f_args) = f_skip;
2760 /* We know this is being padded and we want it too. It is an internal
2761 type so hide the warnings from the user. */
2763 warn_padded = false;
2765 layout_type (record);
2769 /* The correct type is an array type of one element. */
2770 return build_array_type (record, build_index_type (size_zero_node));
2773 /* Implement va_start by filling the va_list structure VALIST.
2774 NEXTARG points to the first anonymous stack argument.
2776 The following global variables are used to initialize
2777 the va_list structure:
2779 current_function_args_info;
2780 the CUMULATIVE_ARGS for this function
2782 current_function_arg_offset_rtx:
2783 holds the offset of the first anonymous stack argument
2784 (relative to the virtual arg pointer). */
2787 spu_va_start (tree valist, rtx nextarg)
2789 tree f_args, f_skip;
2792 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2793 f_skip = TREE_CHAIN (f_args);
2795 valist = build_va_arg_indirect_ref (valist);
2797 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2799 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2801 /* Find the __args area. */
2802 t = make_tree (TREE_TYPE (args), nextarg);
2803 if (current_function_pretend_args_size > 0)
2804 t = build2 (PLUS_EXPR, TREE_TYPE (args), t,
2805 build_int_cst (integer_type_node, -STACK_POINTER_OFFSET));
2806 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
2807 TREE_SIDE_EFFECTS (t) = 1;
2808 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2810 /* Find the __skip area. */
2811 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2812 t = build2 (PLUS_EXPR, TREE_TYPE (skip), t,
2813 build_int_cst (integer_type_node,
2814 (current_function_pretend_args_size
2815 - STACK_POINTER_OFFSET)));
2816 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
2817 TREE_SIDE_EFFECTS (t) = 1;
2818 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2821 /* Gimplify va_arg by updating the va_list structure
2822 VALIST as required to retrieve an argument of type
2823 TYPE, and returning that argument.
2825 ret = va_arg(VALIST, TYPE);
2827 generates code equivalent to:
2829 paddedsize = (sizeof(TYPE) + 15) & -16;
2830 if (VALIST.__args + paddedsize > VALIST.__skip
2831 && VALIST.__args <= VALIST.__skip)
2832 addr = VALIST.__skip + 32;
2834 addr = VALIST.__args;
2835 VALIST.__args = addr + paddedsize;
2836 ret = *(TYPE *)addr;
2839 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
2840 tree * post_p ATTRIBUTE_UNUSED)
2842 tree f_args, f_skip;
2844 HOST_WIDE_INT size, rsize;
2845 tree paddedsize, addr, tmp;
2846 bool pass_by_reference_p;
2848 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2849 f_skip = TREE_CHAIN (f_args);
2851 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2853 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
2855 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
2857 addr = create_tmp_var (ptr_type_node, "va_arg");
2858 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
2860 /* if an object is dynamically sized, a pointer to it is passed
2861 instead of the object itself. */
2862 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
2864 if (pass_by_reference_p)
2865 type = build_pointer_type (type);
2866 size = int_size_in_bytes (type);
2867 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
2869 /* build conditional expression to calculate addr. The expression
2870 will be gimplified later. */
2871 paddedsize = fold_convert (ptr_type_node, size_int (rsize));
2872 tmp = build2 (PLUS_EXPR, ptr_type_node, args, paddedsize);
2873 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
2874 build2 (GT_EXPR, boolean_type_node, tmp, skip),
2875 build2 (LE_EXPR, boolean_type_node, args, skip));
2877 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2878 build2 (PLUS_EXPR, ptr_type_node, skip,
2879 fold_convert (ptr_type_node, size_int (32))), args);
2881 tmp = build2 (MODIFY_EXPR, ptr_type_node, addr, tmp);
2882 gimplify_and_add (tmp, pre_p);
2884 /* update VALIST.__args */
2885 tmp = build2 (PLUS_EXPR, ptr_type_node, addr, paddedsize);
2886 tmp = build2 (MODIFY_EXPR, TREE_TYPE (args), args, tmp);
2887 gimplify_and_add (tmp, pre_p);
2889 addr = fold_convert (build_pointer_type (type), addr);
2891 if (pass_by_reference_p)
2892 addr = build_va_arg_indirect_ref (addr);
2894 return build_va_arg_indirect_ref (addr);
2897 /* Save parameter registers starting with the register that corresponds
2898 to the first unnamed parameters. If the first unnamed parameter is
2899 in the stack then save no registers. Set pretend_args_size to the
2900 amount of space needed to save the registers. */
2902 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
2903 tree type, int *pretend_size, int no_rtl)
2912 /* cum currently points to the last named argument, we want to
2913 start at the next argument. */
2914 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
2916 offset = -STACK_POINTER_OFFSET;
2917 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
2919 tmp = gen_frame_mem (V4SImode,
2920 plus_constant (virtual_incoming_args_rtx,
2922 emit_move_insn (tmp,
2923 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
2926 *pretend_size = offset + STACK_POINTER_OFFSET;
2931 spu_conditional_register_usage (void)
2935 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
2936 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
2938 global_regs[INTR_REGNUM] = 1;
2941 /* This is called to decide when we can simplify a load instruction. We
2942 must only return true for registers which we know will always be
2943 aligned. Taking into account that CSE might replace this reg with
2944 another one that has not been marked aligned.
2945 So this is really only true for frame, stack and virtual registers,
2946 which we know are always aligned and should not be adversly effected
2949 regno_aligned_for_load (int regno)
2951 return regno == FRAME_POINTER_REGNUM
2952 || regno == HARD_FRAME_POINTER_REGNUM
2953 || regno == STACK_POINTER_REGNUM
2954 || (regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER);
2957 /* Return TRUE when mem is known to be 16-byte aligned. */
2959 aligned_mem_p (rtx mem)
2961 if (MEM_ALIGN (mem) >= 128)
2963 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
2965 if (GET_CODE (XEXP (mem, 0)) == PLUS)
2967 rtx p0 = XEXP (XEXP (mem, 0), 0);
2968 rtx p1 = XEXP (XEXP (mem, 0), 1);
2969 if (regno_aligned_for_load (REGNO (p0)))
2971 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
2973 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
2977 else if (GET_CODE (XEXP (mem, 0)) == REG)
2979 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
2982 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
2984 else if (GET_CODE (XEXP (mem, 0)) == CONST)
2986 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
2987 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
2988 if (GET_CODE (p0) == SYMBOL_REF
2989 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
2995 /* Return TRUE if we are certain the mem refers to a complete object
2996 which is both 16-byte aligned and padded to a 16-byte boundary. This
2997 would make it safe to store with a single instruction.
2998 We guarantee the alignment and padding for static objects by aligning
2999 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3000 FIXME: We currently cannot guarantee this for objects on the stack
3001 because assign_parm_setup_stack calls assign_stack_local with the
3002 alignment of the parameter mode and in that case the alignment never
3003 gets adjusted by LOCAL_ALIGNMENT. */
3005 store_with_one_insn_p (rtx mem)
3007 rtx addr = XEXP (mem, 0);
3008 if (GET_MODE (mem) == BLKmode)
3010 /* Only static objects. */
3011 if (GET_CODE (addr) == SYMBOL_REF)
3013 /* We use the associated declaration to make sure the access is
3014 refering to the whole object.
3015 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3016 if it is necessary. Will there be cases where one exists, and
3017 the other does not? Will there be cases where both exist, but
3018 have different types? */
3019 tree decl = MEM_EXPR (mem);
3021 && TREE_CODE (decl) == VAR_DECL
3022 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3024 decl = SYMBOL_REF_DECL (addr);
3026 && TREE_CODE (decl) == VAR_DECL
3027 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3034 spu_expand_mov (rtx * ops, enum machine_mode mode)
3036 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3039 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3041 rtx from = SUBREG_REG (ops[1]);
3042 enum machine_mode imode = GET_MODE (from);
3044 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3045 && GET_MODE_CLASS (imode) == MODE_INT
3046 && subreg_lowpart_p (ops[1]));
3048 if (GET_MODE_SIZE (imode) < 4)
3050 from = gen_rtx_SUBREG (SImode, from, 0);
3054 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3056 enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code;
3057 emit_insn (GEN_FCN (icode) (ops[0], from));
3060 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3064 /* At least one of the operands needs to be a register. */
3065 if ((reload_in_progress | reload_completed) == 0
3066 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3068 rtx temp = force_reg (mode, ops[1]);
3069 emit_move_insn (ops[0], temp);
3072 if (reload_in_progress || reload_completed)
3074 enum machine_mode mode = GET_MODE (ops[0]);
3075 if (GET_CODE (ops[1]) == CONST_INT
3076 && (mode == DImode || mode == TImode)
3077 && ((INTVAL (ops[1]) >> 32) & 0xffffffffll) !=
3078 (INTVAL (ops[1]) & 0xffffffffll))
3080 rtx mem = force_const_mem (mode, ops[1]);
3081 if (TARGET_LARGE_MEM)
3083 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
3084 emit_move_insn (addr, XEXP (mem, 0));
3085 mem = replace_equiv_address (mem, addr);
3087 emit_move_insn (ops[0], mem);
3090 else if ((GET_CODE (ops[1]) == CONST_INT
3091 || GET_CODE (ops[1]) == CONST_DOUBLE
3092 || GET_CODE (ops[1]) == CONST_VECTOR)
3093 && !immediate_load_p (ops[1], mode)
3094 && !fsmbi_const_p (ops[1]))
3096 unsigned char arrlo[16];
3097 unsigned char arrhi[16];
3098 rtx to = ops[0], hi, lo;
3100 constant_to_array (mode, ops[1], arrhi);
3101 for (i = 0; i < 16; i += 4)
3103 arrlo[i + 2] = arrhi[i + 2];
3104 arrlo[i + 3] = arrhi[i + 3];
3105 arrlo[i + 0] = arrlo[i + 1] = 0;
3106 arrhi[i + 2] = arrhi[i + 3] = 0;
3110 to = spu_gen_subreg (SImode, ops[0]);
3113 else if (mode == V4SFmode)
3115 to = spu_gen_subreg (V4SImode, ops[0]);
3118 hi = array_to_constant (mode, arrhi);
3119 lo = array_to_constant (mode, arrlo);
3120 emit_move_insn (to, hi);
3121 emit_insn (gen_rtx_SET (VOIDmode, to, gen_rtx_IOR (mode, to, lo)));
3124 if ((GET_CODE (ops[1]) == CONST
3125 && !legitimate_const (ops[1], 0))
3126 || (TARGET_LARGE_MEM
3127 && (GET_CODE (ops[1]) == CONST
3128 || GET_CODE (ops[1]) == SYMBOL_REF
3129 || GET_CODE (ops[1]) == LABEL_REF)))
3131 emit_insn (gen_high (ops[0], ops[1]));
3132 emit_insn (gen_low (ops[0], ops[0], ops[1]));
3135 rtx pic_reg = get_pic_reg ();
3136 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
3137 current_function_uses_pic_offset_table = 1;
3142 && (GET_CODE (ops[1]) == SYMBOL_REF
3143 || GET_CODE (ops[1]) == LABEL_REF
3144 || GET_CODE (ops[1]) == CONST))
3146 rtx pic_reg = get_pic_reg ();
3147 emit_insn (gen_pic (ops[0], ops[1]));
3148 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
3149 current_function_uses_pic_offset_table = 1;
3156 if (GET_CODE (ops[0]) == MEM)
3158 if (!spu_valid_move (ops))
3160 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3161 gen_reg_rtx (TImode)));
3165 else if (GET_CODE (ops[1]) == MEM)
3167 if (!spu_valid_move (ops))
3170 (ops[0], ops[1], gen_reg_rtx (TImode),
3171 gen_reg_rtx (SImode)));
3175 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3177 if (GET_CODE (ops[1]) == CONST_INT)
3179 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3180 if (val != INTVAL (ops[1]))
3182 emit_move_insn (ops[0], GEN_INT (val));
3193 /* For now, only frame registers are known to be aligned at all times.
3194 We can't trust REGNO_POINTER_ALIGN because optimization will move
3195 registers around, potentially changing an "aligned" register in an
3196 address to an unaligned register, which would result in an invalid
3198 int regno = REGNO (reg);
3199 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3203 spu_split_load (rtx * ops)
3205 enum machine_mode mode = GET_MODE (ops[0]);
3206 rtx addr, load, rot, mem, p0, p1;
3209 addr = XEXP (ops[1], 0);
3213 if (GET_CODE (addr) == PLUS)
3216 aligned reg + aligned reg => lqx
3217 aligned reg + unaligned reg => lqx, rotqby
3218 aligned reg + aligned const => lqd
3219 aligned reg + unaligned const => lqd, rotqbyi
3220 unaligned reg + aligned reg => lqx, rotqby
3221 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3222 unaligned reg + aligned const => lqd, rotqby
3223 unaligned reg + unaligned const -> not allowed by legitimate address
3225 p0 = XEXP (addr, 0);
3226 p1 = XEXP (addr, 1);
3227 if (reg_align (p0) < 128)
3229 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3231 emit_insn (gen_addsi3 (ops[3], p0, p1));
3239 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3241 rot_amt = INTVAL (p1) & 15;
3242 p1 = GEN_INT (INTVAL (p1) & -16);
3243 addr = gen_rtx_PLUS (SImode, p0, p1);
3245 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3249 else if (GET_CODE (addr) == REG)
3251 if (reg_align (addr) < 128)
3254 else if (GET_CODE (addr) == CONST)
3256 if (GET_CODE (XEXP (addr, 0)) == PLUS
3257 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3258 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3260 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3262 addr = gen_rtx_CONST (Pmode,
3263 gen_rtx_PLUS (Pmode,
3264 XEXP (XEXP (addr, 0), 0),
3265 GEN_INT (rot_amt & -16)));
3267 addr = XEXP (XEXP (addr, 0), 0);
3272 else if (GET_CODE (addr) == CONST_INT)
3274 rot_amt = INTVAL (addr);
3275 addr = GEN_INT (rot_amt & -16);
3277 else if (!ALIGNED_SYMBOL_REF_P (addr))
3280 if (GET_MODE_SIZE (mode) < 4)
3281 rot_amt += GET_MODE_SIZE (mode) - 4;
3287 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3294 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3295 mem = change_address (ops[1], TImode, addr);
3297 emit_insn (gen_lq_ti (load, mem));
3300 emit_insn (gen_rotqby_ti (load, load, rot));
3302 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3304 if (reload_completed)
3305 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3307 emit_insn (gen_spu_convert (ops[0], load));
3311 spu_split_store (rtx * ops)
3313 enum machine_mode mode = GET_MODE (ops[0]);
3316 rtx addr, p0, p1, p1_lo, smem;
3320 addr = XEXP (ops[0], 0);
3322 if (GET_CODE (addr) == PLUS)
3325 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3326 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3327 aligned reg + aligned const => lqd, c?d, shuf, stqx
3328 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3329 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3330 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3331 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3332 unaligned reg + unaligned const -> not allowed by legitimate address
3335 p0 = XEXP (addr, 0);
3336 p1 = p1_lo = XEXP (addr, 1);
3337 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3339 p1_lo = GEN_INT (INTVAL (p1) & 15);
3340 p1 = GEN_INT (INTVAL (p1) & -16);
3341 addr = gen_rtx_PLUS (SImode, p0, p1);
3344 else if (GET_CODE (addr) == REG)
3348 p1 = p1_lo = const0_rtx;
3353 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3354 p1 = 0; /* aform doesn't use p1 */
3356 if (ALIGNED_SYMBOL_REF_P (addr))
3358 else if (GET_CODE (addr) == CONST)
3360 if (GET_CODE (XEXP (addr, 0)) == PLUS
3361 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3362 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3364 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3366 addr = gen_rtx_CONST (Pmode,
3367 gen_rtx_PLUS (Pmode,
3368 XEXP (XEXP (addr, 0), 0),
3369 GEN_INT (v & -16)));
3371 addr = XEXP (XEXP (addr, 0), 0);
3372 p1_lo = GEN_INT (v & 15);
3375 else if (GET_CODE (addr) == CONST_INT)
3377 p1_lo = GEN_INT (INTVAL (addr) & 15);
3378 addr = GEN_INT (INTVAL (addr) & -16);
3382 scalar = store_with_one_insn_p (ops[0]);
3385 /* We could copy the flags from the ops[0] MEM to mem here,
3386 We don't because we want this load to be optimized away if
3387 possible, and copying the flags will prevent that in certain
3388 cases, e.g. consider the volatile flag. */
3390 emit_insn (gen_lq (reg, copy_rtx (addr)));
3392 if (!p0 || reg_align (p0) >= 128)
3393 p0 = stack_pointer_rtx;
3397 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3398 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3400 else if (reload_completed)
3402 if (GET_CODE (ops[1]) == REG)
3403 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3404 else if (GET_CODE (ops[1]) == SUBREG)
3405 emit_move_insn (reg,
3406 gen_rtx_REG (GET_MODE (reg),
3407 REGNO (SUBREG_REG (ops[1]))));
3413 if (GET_CODE (ops[1]) == REG)
3414 emit_insn (gen_spu_convert (reg, ops[1]));
3415 else if (GET_CODE (ops[1]) == SUBREG)
3416 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3421 if (GET_MODE_SIZE (mode) < 4 && scalar)
3422 emit_insn (gen_shlqby_ti
3423 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3425 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3426 smem = change_address (ops[0], TImode, addr);
3427 /* We can't use the previous alias set because the memory has changed
3428 size and can potentially overlap objects of other types. */
3429 set_mem_alias_set (smem, 0);
3431 emit_insn (gen_stq_ti (smem, reg));
3434 /* Return TRUE if X is MEM which is a struct member reference
3435 and the member can safely be loaded and stored with a single
3436 instruction because it is padded. */
3438 mem_is_padded_component_ref (rtx x)
3440 tree t = MEM_EXPR (x);
3442 if (!t || TREE_CODE (t) != COMPONENT_REF)
3444 t = TREE_OPERAND (t, 1);
3445 if (!t || TREE_CODE (t) != FIELD_DECL
3446 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3448 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3449 r = DECL_FIELD_CONTEXT (t);
3450 if (!r || TREE_CODE (r) != RECORD_TYPE)
3452 /* Make sure they are the same mode */
3453 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3455 /* If there are no following fields then the field alignment assures
3456 the structure is padded to the alignement which means this field is
3458 if (TREE_CHAIN (t) == 0)
3460 /* If the following field is also aligned then this field will be
3463 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3469 spu_valid_move (rtx * ops)
3471 enum machine_mode mode = GET_MODE (ops[0]);
3472 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3475 /* init_expr_once tries to recog against load and store insns to set
3476 the direct_load[] and direct_store[] arrays. We always want to
3477 consider those loads and stores valid. init_expr_once is called in
3478 the context of a dummy function which does not have a decl. */
3479 if (cfun->decl == 0)
3482 /* Don't allows loads/stores which would require more than 1 insn.
3483 During and after reload we assume loads and stores only take 1
3485 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3487 if (GET_CODE (ops[0]) == MEM
3488 && (GET_MODE_SIZE (mode) < 4
3489 || !(store_with_one_insn_p (ops[0])
3490 || mem_is_padded_component_ref (ops[0]))))
3492 if (GET_CODE (ops[1]) == MEM
3493 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3499 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3500 can be generated using the fsmbi instruction. */
3502 fsmbi_const_p (rtx x)
3504 enum machine_mode mode;
3505 unsigned char arr[16];
3508 /* We can always choose DImode for CONST_INT because the high bits of
3509 an SImode will always be all 1s, i.e., valid for fsmbi. */
3510 mode = GET_CODE (x) == CONST_INT ? DImode : GET_MODE (x);
3511 constant_to_array (mode, x, arr);
3513 for (i = 0; i < 16; i++)
3514 if (arr[i] != 0 && arr[i] != 0xff)
3519 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3520 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3521 than 16 bytes, the value is repeated across the rest of the array. */
3523 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3528 memset (arr, 0, 16);
3529 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3530 if (GET_CODE (x) == CONST_INT
3531 || (GET_CODE (x) == CONST_DOUBLE
3532 && (mode == SFmode || mode == DFmode)))
3534 gcc_assert (mode != VOIDmode && mode != BLKmode);
3536 if (GET_CODE (x) == CONST_DOUBLE)
3537 val = const_double_to_hwint (x);
3540 first = GET_MODE_SIZE (mode) - 1;
3541 for (i = first; i >= 0; i--)
3543 arr[i] = val & 0xff;
3546 /* Splat the constant across the whole array. */
3547 for (j = 0, i = first + 1; i < 16; i++)
3550 j = (j == first) ? 0 : j + 1;
3553 else if (GET_CODE (x) == CONST_DOUBLE)
3555 val = CONST_DOUBLE_LOW (x);
3556 for (i = 15; i >= 8; i--)
3558 arr[i] = val & 0xff;
3561 val = CONST_DOUBLE_HIGH (x);
3562 for (i = 7; i >= 0; i--)
3564 arr[i] = val & 0xff;
3568 else if (GET_CODE (x) == CONST_VECTOR)
3572 mode = GET_MODE_INNER (mode);
3573 units = CONST_VECTOR_NUNITS (x);
3574 for (i = 0; i < units; i++)
3576 elt = CONST_VECTOR_ELT (x, i);
3577 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
3579 if (GET_CODE (elt) == CONST_DOUBLE)
3580 val = const_double_to_hwint (elt);
3583 first = GET_MODE_SIZE (mode) - 1;
3584 if (first + i * GET_MODE_SIZE (mode) > 16)
3586 for (j = first; j >= 0; j--)
3588 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
3598 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3599 smaller than 16 bytes, use the bytes that would represent that value
3600 in a register, e.g., for QImode return the value of arr[3]. */
3602 array_to_constant (enum machine_mode mode, unsigned char arr[16])
3604 enum machine_mode inner_mode;
3606 int units, size, i, j, k;
3609 if (GET_MODE_CLASS (mode) == MODE_INT
3610 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3612 j = GET_MODE_SIZE (mode);
3613 i = j < 4 ? 4 - j : 0;
3614 for (val = 0; i < j; i++)
3615 val = (val << 8) | arr[i];
3616 val = trunc_int_for_mode (val, mode);
3617 return GEN_INT (val);
3623 for (i = high = 0; i < 8; i++)
3624 high = (high << 8) | arr[i];
3625 for (i = 8, val = 0; i < 16; i++)
3626 val = (val << 8) | arr[i];
3627 return immed_double_const (val, high, TImode);
3631 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3632 val = trunc_int_for_mode (val, SImode);
3633 return hwint_to_const_double (val, SFmode);
3637 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3639 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
3640 return hwint_to_const_double (val, DFmode);
3643 if (!VECTOR_MODE_P (mode))
3646 units = GET_MODE_NUNITS (mode);
3647 size = GET_MODE_UNIT_SIZE (mode);
3648 inner_mode = GET_MODE_INNER (mode);
3649 v = rtvec_alloc (units);
3651 for (k = i = 0; i < units; ++i)
3654 for (j = 0; j < size; j++, k++)
3655 val = (val << 8) | arr[k];
3657 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
3658 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
3660 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
3665 return gen_rtx_CONST_VECTOR (mode, v);
3669 reloc_diagnostic (rtx x)
3671 tree loc_decl, decl = 0;
3673 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
3676 if (GET_CODE (x) == SYMBOL_REF)
3677 decl = SYMBOL_REF_DECL (x);
3678 else if (GET_CODE (x) == CONST
3679 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3680 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
3682 /* SYMBOL_REF_DECL is not necessarily a DECL. */
3683 if (decl && !DECL_P (decl))
3686 /* We use last_assemble_variable_decl to get line information. It's
3687 not always going to be right and might not even be close, but will
3688 be right for the more common cases. */
3689 if (!last_assemble_variable_decl)
3692 loc_decl = last_assemble_variable_decl;
3694 /* The decl could be a string constant. */
3695 if (decl && DECL_P (decl))
3696 msg = "%Jcreating run-time relocation for %qD";
3698 msg = "creating run-time relocation";
3700 if (TARGET_ERROR_RELOC) /** default : error reloc **/
3701 error (msg, loc_decl, decl);
3703 warning (0, msg, loc_decl, decl);
3706 /* Hook into assemble_integer so we can generate an error for run-time
3707 relocations. The SPU ABI disallows them. */
3709 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
3711 /* By default run-time relocations aren't supported, but we allow them
3712 in case users support it in their own run-time loader. And we provide
3713 a warning for those users that don't. */
3714 if ((GET_CODE (x) == SYMBOL_REF)
3715 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
3716 reloc_diagnostic (x);
3718 return default_assemble_integer (x, size, aligned_p);
3722 spu_asm_globalize_label (FILE * file, const char *name)
3724 fputs ("\t.global\t", file);
3725 assemble_name (file, name);
3730 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
3732 enum machine_mode mode = GET_MODE (x);
3733 int cost = COSTS_N_INSNS (2);
3735 /* Folding to a CONST_VECTOR will use extra space but there might
3736 be only a small savings in cycles. We'd like to use a CONST_VECTOR
3737 only if it allows us to fold away multiple insns. Changin the cost
3738 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
3739 because this cost will only be compared against a single insn.
3740 if (code == CONST_VECTOR)
3741 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
3744 /* Use defaults for float operations. Not accurate but good enough. */
3747 *total = COSTS_N_INSNS (13);
3752 *total = COSTS_N_INSNS (6);
3758 if (satisfies_constraint_K (x))
3760 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
3761 *total = COSTS_N_INSNS (1);
3763 *total = COSTS_N_INSNS (3);
3767 *total = COSTS_N_INSNS (3);
3772 *total = COSTS_N_INSNS (0);
3776 *total = COSTS_N_INSNS (5);
3780 case FLOAT_TRUNCATE:
3782 case UNSIGNED_FLOAT:
3785 *total = COSTS_N_INSNS (7);
3791 *total = COSTS_N_INSNS (9);
3798 GET_CODE (XEXP (x, 0)) ==
3799 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
3800 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
3802 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3804 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3805 cost = COSTS_N_INSNS (14);
3806 if ((val & 0xffff) == 0)
3807 cost = COSTS_N_INSNS (9);
3808 else if (val > 0 && val < 0x10000)
3809 cost = COSTS_N_INSNS (11);
3818 *total = COSTS_N_INSNS (20);
3825 *total = COSTS_N_INSNS (4);
3828 if (XINT (x, 1) == UNSPEC_CONVERT)
3829 *total = COSTS_N_INSNS (0);
3831 *total = COSTS_N_INSNS (4);
3834 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
3835 if (GET_MODE_CLASS (mode) == MODE_INT
3836 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
3837 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
3838 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
3844 spu_eh_return_filter_mode (void)
3846 /* We would like this to be SImode, but sjlj exceptions seems to work
3847 only with word_mode. */
3851 /* Decide whether we can make a sibling call to a function. DECL is the
3852 declaration of the function being targeted by the call and EXP is the
3853 CALL_EXPR representing the call. */
3855 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3857 return decl && !TARGET_LARGE_MEM;
3860 /* We need to correctly update the back chain pointer and the Available
3861 Stack Size (which is in the second slot of the sp register.) */
3863 spu_allocate_stack (rtx op0, rtx op1)
3866 rtx chain = gen_reg_rtx (V4SImode);
3867 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
3868 rtx sp = gen_reg_rtx (V4SImode);
3869 rtx splatted = gen_reg_rtx (V4SImode);
3870 rtx pat = gen_reg_rtx (TImode);
3872 /* copy the back chain so we can save it back again. */
3873 emit_move_insn (chain, stack_bot);
3875 op1 = force_reg (SImode, op1);
3877 v = 0x1020300010203ll;
3878 emit_move_insn (pat, immed_double_const (v, v, TImode));
3879 emit_insn (gen_shufb (splatted, op1, op1, pat));
3881 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
3882 emit_insn (gen_subv4si3 (sp, sp, splatted));
3884 if (flag_stack_check)
3886 rtx avail = gen_reg_rtx(SImode);
3887 rtx result = gen_reg_rtx(SImode);
3888 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
3889 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
3890 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
3893 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
3895 emit_move_insn (stack_bot, chain);
3897 emit_move_insn (op0, virtual_stack_dynamic_rtx);
3901 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
3903 static unsigned char arr[16] =
3904 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
3905 rtx temp = gen_reg_rtx (SImode);
3906 rtx temp2 = gen_reg_rtx (SImode);
3907 rtx temp3 = gen_reg_rtx (V4SImode);
3908 rtx temp4 = gen_reg_rtx (V4SImode);
3909 rtx pat = gen_reg_rtx (TImode);
3910 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
3912 /* Restore the backchain from the first word, sp from the second. */
3913 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
3914 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
3916 emit_move_insn (pat, array_to_constant (TImode, arr));
3918 /* Compute Available Stack Size for sp */
3919 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
3920 emit_insn (gen_shufb (temp3, temp, temp, pat));
3922 /* Compute Available Stack Size for back chain */
3923 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
3924 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
3925 emit_insn (gen_addv4si3 (temp4, sp, temp4));
3927 emit_insn (gen_addv4si3 (sp, sp, temp3));
3928 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
3932 spu_init_libfuncs (void)
3934 set_optab_libfunc (smul_optab, DImode, "__muldi3");
3935 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
3936 set_optab_libfunc (smod_optab, DImode, "__moddi3");
3937 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
3938 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
3939 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
3940 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
3941 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
3942 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
3943 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
3944 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
3946 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
3947 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
3950 /* Make a subreg, stripping any existing subreg. We could possibly just
3951 call simplify_subreg, but in this case we know what we want. */
3953 spu_gen_subreg (enum machine_mode mode, rtx x)
3955 if (GET_CODE (x) == SUBREG)
3957 if (GET_MODE (x) == mode)
3959 return gen_rtx_SUBREG (mode, x, 0);
3963 spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
3965 return (TYPE_MODE (type) == BLKmode
3967 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3968 || int_size_in_bytes (type) >
3969 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
3972 /* Create the built-in types and functions */
3974 struct spu_builtin_description spu_builtins[] = {
3975 #define DEF_BUILTIN(fcode, icode, name, type, params) \
3976 {fcode, icode, name, type, params, NULL_TREE},
3977 #include "spu-builtins.def"
3982 spu_init_builtins (void)
3984 struct spu_builtin_description *d;
3987 V16QI_type_node = build_vector_type (intQI_type_node, 16);
3988 V8HI_type_node = build_vector_type (intHI_type_node, 8);
3989 V4SI_type_node = build_vector_type (intSI_type_node, 4);
3990 V2DI_type_node = build_vector_type (intDI_type_node, 2);
3991 V4SF_type_node = build_vector_type (float_type_node, 4);
3992 V2DF_type_node = build_vector_type (double_type_node, 2);
3994 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
3995 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
3996 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
3997 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
3999 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4001 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4002 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4003 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4004 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4005 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4006 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4007 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4008 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4009 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4010 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4011 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4012 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4014 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4015 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4016 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4017 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4018 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4019 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4020 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4021 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4023 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4024 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4026 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4028 spu_builtin_types[SPU_BTI_PTR] =
4029 build_pointer_type (build_qualified_type
4031 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4033 /* For each builtin we build a new prototype. The tree code will make
4034 sure nodes are shared. */
4035 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4038 char name[64]; /* build_function will make a copy. */
4044 /* find last parm */
4045 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4051 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4053 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4055 sprintf (name, "__builtin_%s", d->name);
4057 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4063 spu_safe_dma (HOST_WIDE_INT channel)
4065 return (channel >= 21 && channel <= 27);
4069 spu_builtin_splats (rtx ops[])
4071 enum machine_mode mode = GET_MODE (ops[0]);
4072 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4074 unsigned char arr[16];
4075 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4076 emit_move_insn (ops[0], array_to_constant (mode, arr));
4078 else if (GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4080 rtvec v = rtvec_alloc (4);
4081 RTVEC_ELT (v, 0) = ops[1];
4082 RTVEC_ELT (v, 1) = ops[1];
4083 RTVEC_ELT (v, 2) = ops[1];
4084 RTVEC_ELT (v, 3) = ops[1];
4085 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4089 rtx reg = gen_reg_rtx (TImode);
4091 if (GET_CODE (ops[1]) != REG
4092 && GET_CODE (ops[1]) != SUBREG)
4093 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4099 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4105 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4110 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4115 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4121 emit_move_insn (reg, shuf);
4122 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4127 spu_builtin_extract (rtx ops[])
4129 enum machine_mode mode;
4132 mode = GET_MODE (ops[1]);
4134 if (GET_CODE (ops[2]) == CONST_INT)
4139 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4142 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4145 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4148 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4151 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4154 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4162 from = spu_gen_subreg (TImode, ops[1]);
4163 rot = gen_reg_rtx (TImode);
4164 tmp = gen_reg_rtx (SImode);
4169 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4172 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4173 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4177 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4181 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4186 emit_insn (gen_rotqby_ti (rot, from, tmp));
4188 emit_insn (gen_spu_convert (ops[0], rot));
4192 spu_builtin_insert (rtx ops[])
4194 enum machine_mode mode = GET_MODE (ops[0]);
4195 enum machine_mode imode = GET_MODE_INNER (mode);
4196 rtx mask = gen_reg_rtx (TImode);
4199 if (GET_CODE (ops[3]) == CONST_INT)
4200 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4203 offset = gen_reg_rtx (SImode);
4204 emit_insn (gen_mulsi3
4205 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4208 (mask, stack_pointer_rtx, offset,
4209 GEN_INT (GET_MODE_SIZE (imode))));
4210 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4214 spu_builtin_promote (rtx ops[])
4216 enum machine_mode mode, imode;
4217 rtx rot, from, offset;
4220 mode = GET_MODE (ops[0]);
4221 imode = GET_MODE_INNER (mode);
4223 from = gen_reg_rtx (TImode);
4224 rot = spu_gen_subreg (TImode, ops[0]);
4226 emit_insn (gen_spu_convert (from, ops[1]));
4228 if (GET_CODE (ops[2]) == CONST_INT)
4230 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4231 if (GET_MODE_SIZE (imode) < 4)
4232 pos += 4 - GET_MODE_SIZE (imode);
4233 offset = GEN_INT (pos & 15);
4237 offset = gen_reg_rtx (SImode);
4241 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4244 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4245 emit_insn (gen_addsi3 (offset, offset, offset));
4249 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4250 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4254 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4260 emit_insn (gen_rotqby_ti (rot, from, offset));
4264 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4266 rtx shuf = gen_reg_rtx (V4SImode);
4267 rtx insn = gen_reg_rtx (V4SImode);
4272 fnaddr = force_reg (SImode, fnaddr);
4273 cxt = force_reg (SImode, cxt);
4275 if (TARGET_LARGE_MEM)
4277 rtx rotl = gen_reg_rtx (V4SImode);
4278 rtx mask = gen_reg_rtx (V4SImode);
4279 rtx bi = gen_reg_rtx (SImode);
4280 unsigned char shufa[16] = {
4281 2, 3, 0, 1, 18, 19, 16, 17,
4282 0, 1, 2, 3, 16, 17, 18, 19
4284 unsigned char insna[16] = {
4286 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4288 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4291 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4292 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4294 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4295 emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4296 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4297 emit_insn (gen_selb (insn, insnc, rotl, mask));
4299 mem = memory_address (Pmode, tramp);
4300 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4302 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4303 mem = memory_address (Pmode, plus_constant (tramp, 16));
4304 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4308 rtx scxt = gen_reg_rtx (SImode);
4309 rtx sfnaddr = gen_reg_rtx (SImode);
4310 unsigned char insna[16] = {
4311 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4317 shufc = gen_reg_rtx (TImode);
4318 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4320 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4321 fits 18 bits and the last 4 are zeros. This will be true if
4322 the stack pointer is initialized to 0x3fff0 at program start,
4323 otherwise the ila instruction will be garbage. */
4325 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4326 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4328 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4329 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4330 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4332 mem = memory_address (Pmode, tramp);
4333 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4336 emit_insn (gen_sync ());
4340 spu_expand_sign_extend (rtx ops[])
4342 unsigned char arr[16];
4343 rtx pat = gen_reg_rtx (TImode);
4346 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4347 if (GET_MODE (ops[1]) == QImode)
4349 sign = gen_reg_rtx (HImode);
4350 emit_insn (gen_extendqihi2 (sign, ops[1]));
4351 for (i = 0; i < 16; i++)
4357 for (i = 0; i < 16; i++)
4359 switch (GET_MODE (ops[1]))
4362 sign = gen_reg_rtx (SImode);
4363 emit_insn (gen_extendhisi2 (sign, ops[1]));
4365 arr[last - 1] = 0x02;
4368 sign = gen_reg_rtx (SImode);
4369 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4370 for (i = 0; i < 4; i++)
4371 arr[last - i] = 3 - i;
4374 sign = gen_reg_rtx (SImode);
4375 c = gen_reg_rtx (SImode);
4376 emit_insn (gen_spu_convert (c, ops[1]));
4377 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4378 for (i = 0; i < 8; i++)
4379 arr[last - i] = 7 - i;
4385 emit_move_insn (pat, array_to_constant (TImode, arr));
4386 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4389 /* expand vector initialization. If there are any constant parts,
4390 load constant parts first. Then load any non-constant parts. */
4392 spu_expand_vector_init (rtx target, rtx vals)
4394 enum machine_mode mode = GET_MODE (target);
4395 int n_elts = GET_MODE_NUNITS (mode);
4397 bool all_same = true;
4398 rtx first, x, first_constant = NULL_RTX;
4401 first = XVECEXP (vals, 0, 0);
4402 for (i = 0; i < n_elts; ++i)
4404 x = XVECEXP (vals, 0, i);
4405 if (!CONSTANT_P (x))
4409 if (first_constant == NULL_RTX)
4412 if (i > 0 && !rtx_equal_p (x, first))
4416 /* if all elements are the same, use splats to repeat elements */
4419 if (!CONSTANT_P (first)
4420 && !register_operand (first, GET_MODE (x)))
4421 first = force_reg (GET_MODE (first), first);
4422 emit_insn (gen_spu_splats (target, first));
4426 /* load constant parts */
4427 if (n_var != n_elts)
4431 emit_move_insn (target,
4432 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4436 rtx constant_parts_rtx = copy_rtx (vals);
4438 gcc_assert (first_constant != NULL_RTX);
4439 /* fill empty slots with the first constant, this increases
4440 our chance of using splats in the recursive call below. */
4441 for (i = 0; i < n_elts; ++i)
4442 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4443 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4445 spu_expand_vector_init (target, constant_parts_rtx);
4449 /* load variable parts */
4452 rtx insert_operands[4];
4454 insert_operands[0] = target;
4455 insert_operands[2] = target;
4456 for (i = 0; i < n_elts; ++i)
4458 x = XVECEXP (vals, 0, i);
4459 if (!CONSTANT_P (x))
4461 if (!register_operand (x, GET_MODE (x)))
4462 x = force_reg (GET_MODE (x), x);
4463 insert_operands[1] = x;
4464 insert_operands[3] = GEN_INT (i);
4465 spu_builtin_insert (insert_operands);