1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
61 /* Builtin types, data and prototypes. */
62 struct spu_builtin_range
67 static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
83 /* Target specific attribute specifications. */
84 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
86 /* Prototypes and external defs. */
87 static void spu_init_builtins (void);
88 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91 static rtx get_pic_reg (void);
92 static int need_to_save_reg (int regno, int saving);
93 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
97 static void emit_nop_for_insn (rtx insn);
98 static bool insn_clobbers_hbr (rtx insn);
99 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
100 int distance, sbitmap blocks);
101 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
103 static rtx get_branch_target (rtx branch);
104 static void spu_machine_dependent_reorg (void);
105 static int spu_sched_issue_rate (void);
106 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
108 static int get_pipe (rtx insn);
109 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110 static void spu_sched_init_global (FILE *, int, int);
111 static void spu_sched_init (FILE *, int, int);
112 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
113 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
115 unsigned char *no_add_attrs);
116 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
118 unsigned char *no_add_attrs);
119 static int spu_naked_function_p (tree func);
120 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
122 static tree spu_build_builtin_va_list (void);
123 static void spu_va_start (tree, rtx);
124 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
126 static int regno_aligned_for_load (int regno);
127 static int store_with_one_insn_p (rtx mem);
128 static int mem_is_padded_component_ref (rtx x);
129 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130 static void spu_asm_globalize_label (FILE * file, const char *name);
131 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
132 int *total, bool speed);
133 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134 static void spu_init_libfuncs (void);
135 static bool spu_return_in_memory (const_tree type, const_tree fntype);
136 static void fix_range (const char *);
137 static void spu_encode_section_info (tree, rtx, int);
138 static tree spu_builtin_mul_widen_even (tree);
139 static tree spu_builtin_mul_widen_odd (tree);
140 static tree spu_builtin_mask_for_load (void);
141 static int spu_builtin_vectorization_cost (bool);
142 static bool spu_vector_alignment_reachable (const_tree, bool);
143 static tree spu_builtin_vec_perm (tree, tree *);
144 static int spu_sms_res_mii (struct ddg *g);
145 static void asm_file_start (void);
146 static unsigned int spu_section_type_flags (tree, const char *, int);
148 extern const char *reg_names[];
149 rtx spu_compare_op0, spu_compare_op1;
151 /* Which instruction set architecture to use. */
153 /* Which cpu are we tuning for. */
156 /* The hardware requires 8 insns between a hint and the branch it
157 effects. This variable describes how many rtl instructions the
158 compiler needs to see before inserting a hint, and then the compiler
159 will insert enough nops to make it at least 8 insns. The default is
160 for the compiler to allow up to 2 nops be emitted. The nops are
161 inserted in pairs, so we round down. */
162 int spu_hint_dist = (8*4) - (2*4);
164 /* Determines whether we run variable tracking in machine dependent
166 static int spu_flag_var_tracking;
181 IC_POOL, /* constant pool */
182 IC_IL1, /* one il* instruction */
183 IC_IL2, /* both ilhu and iohl instructions */
184 IC_IL1s, /* one il* instruction */
185 IC_IL2s, /* both ilhu and iohl instructions */
186 IC_FSMBI, /* the fsmbi instruction */
187 IC_CPAT, /* one of the c*d instructions */
188 IC_FSMBI2 /* fsmbi plus 1 other instruction */
191 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
192 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
193 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
194 static enum immediate_class classify_immediate (rtx op,
195 enum machine_mode mode);
197 static enum machine_mode spu_unwind_word_mode (void);
199 static enum machine_mode
200 spu_libgcc_cmp_return_mode (void);
202 static enum machine_mode
203 spu_libgcc_shift_count_mode (void);
205 /* Built in types. */
206 tree spu_builtin_types[SPU_BTI_MAX];
208 /* TARGET overrides. */
210 #undef TARGET_INIT_BUILTINS
211 #define TARGET_INIT_BUILTINS spu_init_builtins
213 #undef TARGET_EXPAND_BUILTIN
214 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
216 #undef TARGET_UNWIND_WORD_MODE
217 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
219 /* The .8byte directive doesn't seem to work well for a 32 bit
221 #undef TARGET_ASM_UNALIGNED_DI_OP
222 #define TARGET_ASM_UNALIGNED_DI_OP NULL
224 #undef TARGET_RTX_COSTS
225 #define TARGET_RTX_COSTS spu_rtx_costs
227 #undef TARGET_ADDRESS_COST
228 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
230 #undef TARGET_SCHED_ISSUE_RATE
231 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
233 #undef TARGET_SCHED_INIT_GLOBAL
234 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
236 #undef TARGET_SCHED_INIT
237 #define TARGET_SCHED_INIT spu_sched_init
239 #undef TARGET_SCHED_VARIABLE_ISSUE
240 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
242 #undef TARGET_SCHED_REORDER
243 #define TARGET_SCHED_REORDER spu_sched_reorder
245 #undef TARGET_SCHED_REORDER2
246 #define TARGET_SCHED_REORDER2 spu_sched_reorder
248 #undef TARGET_SCHED_ADJUST_COST
249 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
251 const struct attribute_spec spu_attribute_table[];
252 #undef TARGET_ATTRIBUTE_TABLE
253 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
255 #undef TARGET_ASM_INTEGER
256 #define TARGET_ASM_INTEGER spu_assemble_integer
258 #undef TARGET_SCALAR_MODE_SUPPORTED_P
259 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
261 #undef TARGET_VECTOR_MODE_SUPPORTED_P
262 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
264 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
265 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
267 #undef TARGET_ASM_GLOBALIZE_LABEL
268 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
270 #undef TARGET_PASS_BY_REFERENCE
271 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
273 #undef TARGET_MUST_PASS_IN_STACK
274 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
276 #undef TARGET_BUILD_BUILTIN_VA_LIST
277 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
279 #undef TARGET_EXPAND_BUILTIN_VA_START
280 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
282 #undef TARGET_SETUP_INCOMING_VARARGS
283 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
285 #undef TARGET_MACHINE_DEPENDENT_REORG
286 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
288 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
289 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
291 #undef TARGET_DEFAULT_TARGET_FLAGS
292 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
294 #undef TARGET_INIT_LIBFUNCS
295 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
297 #undef TARGET_RETURN_IN_MEMORY
298 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
300 #undef TARGET_ENCODE_SECTION_INFO
301 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
303 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
304 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
306 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
307 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
309 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
310 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
312 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
313 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
315 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
316 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
318 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
319 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
321 #undef TARGET_LIBGCC_CMP_RETURN_MODE
322 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
324 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
325 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
327 #undef TARGET_SCHED_SMS_RES_MII
328 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
330 #undef TARGET_ASM_FILE_START
331 #define TARGET_ASM_FILE_START asm_file_start
333 #undef TARGET_SECTION_TYPE_FLAGS
334 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
336 struct gcc_target targetm = TARGET_INITIALIZER;
339 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
341 /* Override some of the default param values. With so many registers
342 larger values are better for these params. */
343 MAX_PENDING_LIST_LENGTH = 128;
345 /* With so many registers this is better on by default. */
346 flag_rename_registers = 1;
349 /* Sometimes certain combinations of command options do not make sense
350 on a particular target machine. You can define a macro
351 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
352 executed once just after all the command options have been parsed. */
354 spu_override_options (void)
356 /* Small loops will be unpeeled at -O3. For SPU it is more important
357 to keep code small by default. */
358 if (!flag_unroll_loops && !flag_peel_loops
359 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
360 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
362 flag_omit_frame_pointer = 1;
364 /* Functions must be 8 byte aligned so we correctly handle dual issue */
365 if (align_functions < 8)
368 spu_hint_dist = 8*4 - spu_max_nops*4;
369 if (spu_hint_dist < 0)
372 if (spu_fixed_range_string)
373 fix_range (spu_fixed_range_string);
375 /* Determine processor architectural level. */
378 if (strcmp (&spu_arch_string[0], "cell") == 0)
379 spu_arch = PROCESSOR_CELL;
380 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
381 spu_arch = PROCESSOR_CELLEDP;
383 error ("Unknown architecture '%s'", &spu_arch_string[0]);
386 /* Determine processor to tune for. */
389 if (strcmp (&spu_tune_string[0], "cell") == 0)
390 spu_tune = PROCESSOR_CELL;
391 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
392 spu_tune = PROCESSOR_CELLEDP;
394 error ("Unknown architecture '%s'", &spu_tune_string[0]);
397 /* Change defaults according to the processor architecture. */
398 if (spu_arch == PROCESSOR_CELLEDP)
400 /* If no command line option has been otherwise specified, change
401 the default to -mno-safe-hints on celledp -- only the original
402 Cell/B.E. processors require this workaround. */
403 if (!(target_flags_explicit & MASK_SAFE_HINTS))
404 target_flags &= ~MASK_SAFE_HINTS;
407 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
410 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
411 struct attribute_spec.handler. */
413 /* Table of machine attributes. */
414 const struct attribute_spec spu_attribute_table[] =
416 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
417 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
418 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
419 { NULL, 0, 0, false, false, false, NULL }
422 /* True if MODE is valid for the target. By "valid", we mean able to
423 be manipulated in non-trivial ways. In particular, this means all
424 the arithmetic is supported. */
426 spu_scalar_mode_supported_p (enum machine_mode mode)
444 /* Similarly for vector modes. "Supported" here is less strict. At
445 least some operations are supported; need to check optabs or builtins
446 for further details. */
448 spu_vector_mode_supported_p (enum machine_mode mode)
465 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
466 least significant bytes of the outer mode. This function returns
467 TRUE for the SUBREG's where this is correct. */
469 valid_subreg (rtx op)
471 enum machine_mode om = GET_MODE (op);
472 enum machine_mode im = GET_MODE (SUBREG_REG (op));
473 return om != VOIDmode && im != VOIDmode
474 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
475 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
476 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
479 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
480 and adjust the start offset. */
482 adjust_operand (rtx op, HOST_WIDE_INT * start)
484 enum machine_mode mode;
486 /* Strip any paradoxical SUBREG. */
487 if (GET_CODE (op) == SUBREG
488 && (GET_MODE_BITSIZE (GET_MODE (op))
489 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
493 GET_MODE_BITSIZE (GET_MODE (op)) -
494 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
495 op = SUBREG_REG (op);
497 /* If it is smaller than SI, assure a SUBREG */
498 op_size = GET_MODE_BITSIZE (GET_MODE (op));
502 *start += 32 - op_size;
505 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
506 mode = mode_for_size (op_size, MODE_INT, 0);
507 if (mode != GET_MODE (op))
508 op = gen_rtx_SUBREG (mode, op, 0);
513 spu_expand_extv (rtx ops[], int unsignedp)
515 HOST_WIDE_INT width = INTVAL (ops[2]);
516 HOST_WIDE_INT start = INTVAL (ops[3]);
517 HOST_WIDE_INT src_size, dst_size;
518 enum machine_mode src_mode, dst_mode;
519 rtx dst = ops[0], src = ops[1];
522 dst = adjust_operand (ops[0], 0);
523 dst_mode = GET_MODE (dst);
524 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
526 src = adjust_operand (src, &start);
527 src_mode = GET_MODE (src);
528 src_size = GET_MODE_BITSIZE (GET_MODE (src));
532 s = gen_reg_rtx (src_mode);
536 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
539 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
542 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
550 if (width < src_size)
557 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
560 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
563 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
568 s = gen_reg_rtx (src_mode);
569 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
574 convert_move (dst, src, unsignedp);
578 spu_expand_insv (rtx ops[])
580 HOST_WIDE_INT width = INTVAL (ops[1]);
581 HOST_WIDE_INT start = INTVAL (ops[2]);
582 HOST_WIDE_INT maskbits;
583 enum machine_mode dst_mode, src_mode;
584 rtx dst = ops[0], src = ops[3];
585 int dst_size, src_size;
591 if (GET_CODE (ops[0]) == MEM)
592 dst = gen_reg_rtx (TImode);
594 dst = adjust_operand (dst, &start);
595 dst_mode = GET_MODE (dst);
596 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
598 if (CONSTANT_P (src))
600 enum machine_mode m =
601 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
602 src = force_reg (m, convert_to_mode (m, src, 0));
604 src = adjust_operand (src, 0);
605 src_mode = GET_MODE (src);
606 src_size = GET_MODE_BITSIZE (GET_MODE (src));
608 mask = gen_reg_rtx (dst_mode);
609 shift_reg = gen_reg_rtx (dst_mode);
610 shift = dst_size - start - width;
612 /* It's not safe to use subreg here because the compiler assumes
613 that the SUBREG_REG is right justified in the SUBREG. */
614 convert_move (shift_reg, src, 1);
621 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
624 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
627 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
639 maskbits = (-1ll << (32 - width - start));
641 maskbits += (1ll << (32 - start));
642 emit_move_insn (mask, GEN_INT (maskbits));
645 maskbits = (-1ll << (64 - width - start));
647 maskbits += (1ll << (64 - start));
648 emit_move_insn (mask, GEN_INT (maskbits));
652 unsigned char arr[16];
654 memset (arr, 0, sizeof (arr));
655 arr[i] = 0xff >> (start & 7);
656 for (i++; i <= (start + width - 1) / 8; i++)
658 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
659 emit_move_insn (mask, array_to_constant (TImode, arr));
665 if (GET_CODE (ops[0]) == MEM)
667 rtx aligned = gen_reg_rtx (SImode);
668 rtx low = gen_reg_rtx (SImode);
669 rtx addr = gen_reg_rtx (SImode);
670 rtx rotl = gen_reg_rtx (SImode);
671 rtx mask0 = gen_reg_rtx (TImode);
674 emit_move_insn (addr, XEXP (ops[0], 0));
675 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
676 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
677 emit_insn (gen_negsi2 (rotl, low));
678 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
679 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
680 mem = change_address (ops[0], TImode, aligned);
681 set_mem_alias_set (mem, 0);
682 emit_move_insn (dst, mem);
683 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
684 emit_move_insn (mem, dst);
685 if (start + width > MEM_ALIGN (ops[0]))
687 rtx shl = gen_reg_rtx (SImode);
688 rtx mask1 = gen_reg_rtx (TImode);
689 rtx dst1 = gen_reg_rtx (TImode);
691 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
692 emit_insn (gen_shlqby_ti (mask1, mask, shl));
693 mem1 = adjust_address (mem, TImode, 16);
694 set_mem_alias_set (mem1, 0);
695 emit_move_insn (dst1, mem1);
696 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
697 emit_move_insn (mem1, dst1);
701 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
706 spu_expand_block_move (rtx ops[])
708 HOST_WIDE_INT bytes, align, offset;
709 rtx src, dst, sreg, dreg, target;
711 if (GET_CODE (ops[2]) != CONST_INT
712 || GET_CODE (ops[3]) != CONST_INT
713 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
716 bytes = INTVAL (ops[2]);
717 align = INTVAL (ops[3]);
727 for (offset = 0; offset + 16 <= bytes; offset += 16)
729 dst = adjust_address (ops[0], V16QImode, offset);
730 src = adjust_address (ops[1], V16QImode, offset);
731 emit_move_insn (dst, src);
736 unsigned char arr[16] = { 0 };
737 for (i = 0; i < bytes - offset; i++)
739 dst = adjust_address (ops[0], V16QImode, offset);
740 src = adjust_address (ops[1], V16QImode, offset);
741 mask = gen_reg_rtx (V16QImode);
742 sreg = gen_reg_rtx (V16QImode);
743 dreg = gen_reg_rtx (V16QImode);
744 target = gen_reg_rtx (V16QImode);
745 emit_move_insn (mask, array_to_constant (V16QImode, arr));
746 emit_move_insn (dreg, dst);
747 emit_move_insn (sreg, src);
748 emit_insn (gen_selb (target, dreg, sreg, mask));
749 emit_move_insn (dst, target);
757 { SPU_EQ, SPU_GT, SPU_GTU };
759 int spu_comp_icode[12][3] = {
760 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
761 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
762 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
763 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
764 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
765 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
766 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
767 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
768 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
769 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
770 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
771 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
774 /* Generate a compare for CODE. Return a brand-new rtx that represents
775 the result of the compare. GCC can figure this out too if we don't
776 provide all variations of compares, but GCC always wants to use
777 WORD_MODE, we can generate better code in most cases if we do it
780 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
782 int reverse_compare = 0;
783 int reverse_test = 0;
784 rtx compare_result, eq_result;
785 rtx comp_rtx, eq_rtx;
786 rtx target = operands[0];
787 enum machine_mode comp_mode;
788 enum machine_mode op_mode;
789 enum spu_comp_code scode, eq_code, ior_code;
793 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
794 and so on, to keep the constant in operand 1. */
795 if (GET_CODE (spu_compare_op1) == CONST_INT)
797 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
798 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
802 spu_compare_op1 = GEN_INT (val);
806 spu_compare_op1 = GEN_INT (val);
810 spu_compare_op1 = GEN_INT (val);
814 spu_compare_op1 = GEN_INT (val);
823 op_mode = GET_MODE (spu_compare_op0);
829 if (HONOR_NANS (op_mode))
844 if (HONOR_NANS (op_mode))
936 comp_mode = V4SImode;
940 comp_mode = V2DImode;
947 if (GET_MODE (spu_compare_op1) == DFmode
948 && (scode != SPU_GT && scode != SPU_EQ))
951 if (is_set == 0 && spu_compare_op1 == const0_rtx
952 && (GET_MODE (spu_compare_op0) == SImode
953 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
955 /* Don't need to set a register with the result when we are
956 comparing against zero and branching. */
957 reverse_test = !reverse_test;
958 compare_result = spu_compare_op0;
962 compare_result = gen_reg_rtx (comp_mode);
966 rtx t = spu_compare_op1;
967 spu_compare_op1 = spu_compare_op0;
971 if (spu_comp_icode[index][scode] == 0)
974 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
975 (spu_compare_op0, op_mode))
976 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
977 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
978 (spu_compare_op1, op_mode))
979 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
980 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
985 emit_insn (comp_rtx);
989 eq_result = gen_reg_rtx (comp_mode);
990 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
996 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
997 gcc_assert (ior_code != CODE_FOR_nothing);
998 emit_insn (GEN_FCN (ior_code)
999 (compare_result, compare_result, eq_result));
1008 /* We don't have branch on QI compare insns, so we convert the
1009 QI compare result to a HI result. */
1010 if (comp_mode == QImode)
1012 rtx old_res = compare_result;
1013 compare_result = gen_reg_rtx (HImode);
1015 emit_insn (gen_extendqihi2 (compare_result, old_res));
1019 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1021 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1023 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1024 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1025 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1028 else if (is_set == 2)
1030 int compare_size = GET_MODE_BITSIZE (comp_mode);
1031 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1032 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1034 rtx op_t = operands[2];
1035 rtx op_f = operands[3];
1037 /* The result of the comparison can be SI, HI or QI mode. Create a
1038 mask based on that result. */
1039 if (target_size > compare_size)
1041 select_mask = gen_reg_rtx (mode);
1042 emit_insn (gen_extend_compare (select_mask, compare_result));
1044 else if (target_size < compare_size)
1046 gen_rtx_SUBREG (mode, compare_result,
1047 (compare_size - target_size) / BITS_PER_UNIT);
1048 else if (comp_mode != mode)
1049 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1051 select_mask = compare_result;
1053 if (GET_MODE (target) != GET_MODE (op_t)
1054 || GET_MODE (target) != GET_MODE (op_f))
1058 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1060 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1065 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1066 gen_rtx_NOT (comp_mode, compare_result)));
1067 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1068 emit_insn (gen_extendhisi2 (target, compare_result));
1069 else if (GET_MODE (target) == SImode
1070 && GET_MODE (compare_result) == QImode)
1071 emit_insn (gen_extend_compare (target, compare_result));
1073 emit_move_insn (target, compare_result);
1078 const_double_to_hwint (rtx x)
1082 if (GET_MODE (x) == SFmode)
1084 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1085 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1087 else if (GET_MODE (x) == DFmode)
1090 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1091 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1093 val = (val << 32) | (l[1] & 0xffffffff);
1101 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1105 gcc_assert (mode == SFmode || mode == DFmode);
1108 tv[0] = (v << 32) >> 32;
1109 else if (mode == DFmode)
1111 tv[1] = (v << 32) >> 32;
1114 real_from_target (&rv, tv, mode);
1115 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1119 print_operand_address (FILE * file, register rtx addr)
1124 if (GET_CODE (addr) == AND
1125 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1126 && INTVAL (XEXP (addr, 1)) == -16)
1127 addr = XEXP (addr, 0);
1129 switch (GET_CODE (addr))
1132 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1136 reg = XEXP (addr, 0);
1137 offset = XEXP (addr, 1);
1138 if (GET_CODE (offset) == REG)
1140 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1141 reg_names[REGNO (offset)]);
1143 else if (GET_CODE (offset) == CONST_INT)
1145 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1146 INTVAL (offset), reg_names[REGNO (reg)]);
1156 output_addr_const (file, addr);
1166 print_operand (FILE * file, rtx x, int code)
1168 enum machine_mode mode = GET_MODE (x);
1170 unsigned char arr[16];
1171 int xcode = GET_CODE (x);
1173 if (GET_MODE (x) == VOIDmode)
1176 case 'L': /* 128 bits, signed */
1177 case 'm': /* 128 bits, signed */
1178 case 'T': /* 128 bits, signed */
1179 case 't': /* 128 bits, signed */
1182 case 'K': /* 64 bits, signed */
1183 case 'k': /* 64 bits, signed */
1184 case 'D': /* 64 bits, signed */
1185 case 'd': /* 64 bits, signed */
1188 case 'J': /* 32 bits, signed */
1189 case 'j': /* 32 bits, signed */
1190 case 's': /* 32 bits, signed */
1191 case 'S': /* 32 bits, signed */
1198 case 'j': /* 32 bits, signed */
1199 case 'k': /* 64 bits, signed */
1200 case 'm': /* 128 bits, signed */
1201 if (xcode == CONST_INT
1202 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1204 gcc_assert (logical_immediate_p (x, mode));
1205 constant_to_array (mode, x, arr);
1206 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1207 val = trunc_int_for_mode (val, SImode);
1208 switch (which_logical_immediate (val))
1213 fprintf (file, "h");
1216 fprintf (file, "b");
1226 case 'J': /* 32 bits, signed */
1227 case 'K': /* 64 bits, signed */
1228 case 'L': /* 128 bits, signed */
1229 if (xcode == CONST_INT
1230 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1232 gcc_assert (logical_immediate_p (x, mode)
1233 || iohl_immediate_p (x, mode));
1234 constant_to_array (mode, x, arr);
1235 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1236 val = trunc_int_for_mode (val, SImode);
1237 switch (which_logical_immediate (val))
1243 val = trunc_int_for_mode (val, HImode);
1246 val = trunc_int_for_mode (val, QImode);
1251 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1257 case 't': /* 128 bits, signed */
1258 case 'd': /* 64 bits, signed */
1259 case 's': /* 32 bits, signed */
1262 enum immediate_class c = classify_immediate (x, mode);
1266 constant_to_array (mode, x, arr);
1267 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1268 val = trunc_int_for_mode (val, SImode);
1269 switch (which_immediate_load (val))
1274 fprintf (file, "a");
1277 fprintf (file, "h");
1280 fprintf (file, "hu");
1287 constant_to_array (mode, x, arr);
1288 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1290 fprintf (file, "b");
1292 fprintf (file, "h");
1294 fprintf (file, "w");
1296 fprintf (file, "d");
1299 if (xcode == CONST_VECTOR)
1301 x = CONST_VECTOR_ELT (x, 0);
1302 xcode = GET_CODE (x);
1304 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1305 fprintf (file, "a");
1306 else if (xcode == HIGH)
1307 fprintf (file, "hu");
1321 case 'T': /* 128 bits, signed */
1322 case 'D': /* 64 bits, signed */
1323 case 'S': /* 32 bits, signed */
1326 enum immediate_class c = classify_immediate (x, mode);
1330 constant_to_array (mode, x, arr);
1331 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1332 val = trunc_int_for_mode (val, SImode);
1333 switch (which_immediate_load (val))
1340 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1348 constant_to_array (mode, x, arr);
1350 for (i = 0; i < 16; i++)
1355 print_operand (file, GEN_INT (val), 0);
1358 constant_to_array (mode, x, arr);
1359 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1365 if (GET_CODE (x) == CONST_VECTOR)
1366 x = CONST_VECTOR_ELT (x, 0);
1367 output_addr_const (file, x);
1369 fprintf (file, "@h");
1383 if (xcode == CONST_INT)
1385 /* Only 4 least significant bits are relevant for generate
1386 control word instructions. */
1387 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1392 case 'M': /* print code for c*d */
1393 if (GET_CODE (x) == CONST_INT)
1397 fprintf (file, "b");
1400 fprintf (file, "h");
1403 fprintf (file, "w");
1406 fprintf (file, "d");
1415 case 'N': /* Negate the operand */
1416 if (xcode == CONST_INT)
1417 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1418 else if (xcode == CONST_VECTOR)
1419 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1420 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1423 case 'I': /* enable/disable interrupts */
1424 if (xcode == CONST_INT)
1425 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1428 case 'b': /* branch modifiers */
1430 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1431 else if (COMPARISON_P (x))
1432 fprintf (file, "%s", xcode == NE ? "n" : "");
1435 case 'i': /* indirect call */
1438 if (GET_CODE (XEXP (x, 0)) == REG)
1439 /* Used in indirect function calls. */
1440 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1442 output_address (XEXP (x, 0));
1446 case 'p': /* load/store */
1450 xcode = GET_CODE (x);
1455 xcode = GET_CODE (x);
1458 fprintf (file, "d");
1459 else if (xcode == CONST_INT)
1460 fprintf (file, "a");
1461 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1462 fprintf (file, "r");
1463 else if (xcode == PLUS || xcode == LO_SUM)
1465 if (GET_CODE (XEXP (x, 1)) == REG)
1466 fprintf (file, "x");
1468 fprintf (file, "d");
1473 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1475 output_addr_const (file, GEN_INT (val));
1479 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1481 output_addr_const (file, GEN_INT (val));
1485 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1487 output_addr_const (file, GEN_INT (val));
1491 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1492 val = (val >> 3) & 0x1f;
1493 output_addr_const (file, GEN_INT (val));
1497 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1500 output_addr_const (file, GEN_INT (val));
1504 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1507 output_addr_const (file, GEN_INT (val));
1511 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1514 output_addr_const (file, GEN_INT (val));
1518 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1519 val = -(val & -8ll);
1520 val = (val >> 3) & 0x1f;
1521 output_addr_const (file, GEN_INT (val));
1526 fprintf (file, "%s", reg_names[REGNO (x)]);
1527 else if (xcode == MEM)
1528 output_address (XEXP (x, 0));
1529 else if (xcode == CONST_VECTOR)
1530 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1532 output_addr_const (file, x);
1539 output_operand_lossage ("invalid %%xn code");
1544 extern char call_used_regs[];
1546 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1547 caller saved register. For leaf functions it is more efficient to
1548 use a volatile register because we won't need to save and restore the
1549 pic register. This routine is only valid after register allocation
1550 is completed, so we can pick an unused register. */
1554 rtx pic_reg = pic_offset_table_rtx;
1555 if (!reload_completed && !reload_in_progress)
1560 /* Split constant addresses to handle cases that are too large.
1561 Add in the pic register when in PIC mode.
1562 Split immediates that require more than 1 instruction. */
1564 spu_split_immediate (rtx * ops)
1566 enum machine_mode mode = GET_MODE (ops[0]);
1567 enum immediate_class c = classify_immediate (ops[1], mode);
1573 unsigned char arrhi[16];
1574 unsigned char arrlo[16];
1575 rtx to, temp, hi, lo;
1577 enum machine_mode imode = mode;
1578 /* We need to do reals as ints because the constant used in the
1579 IOR might not be a legitimate real constant. */
1580 imode = int_mode_for_mode (mode);
1581 constant_to_array (mode, ops[1], arrhi);
1583 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1586 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1587 for (i = 0; i < 16; i += 4)
1589 arrlo[i + 2] = arrhi[i + 2];
1590 arrlo[i + 3] = arrhi[i + 3];
1591 arrlo[i + 0] = arrlo[i + 1] = 0;
1592 arrhi[i + 2] = arrhi[i + 3] = 0;
1594 hi = array_to_constant (imode, arrhi);
1595 lo = array_to_constant (imode, arrlo);
1596 emit_move_insn (temp, hi);
1597 emit_insn (gen_rtx_SET
1598 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1603 unsigned char arr_fsmbi[16];
1604 unsigned char arr_andbi[16];
1605 rtx to, reg_fsmbi, reg_and;
1607 enum machine_mode imode = mode;
1608 /* We need to do reals as ints because the constant used in the
1609 * AND might not be a legitimate real constant. */
1610 imode = int_mode_for_mode (mode);
1611 constant_to_array (mode, ops[1], arr_fsmbi);
1613 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1616 for (i = 0; i < 16; i++)
1617 if (arr_fsmbi[i] != 0)
1619 arr_andbi[0] = arr_fsmbi[i];
1620 arr_fsmbi[i] = 0xff;
1622 for (i = 1; i < 16; i++)
1623 arr_andbi[i] = arr_andbi[0];
1624 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1625 reg_and = array_to_constant (imode, arr_andbi);
1626 emit_move_insn (to, reg_fsmbi);
1627 emit_insn (gen_rtx_SET
1628 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1632 if (reload_in_progress || reload_completed)
1634 rtx mem = force_const_mem (mode, ops[1]);
1635 if (TARGET_LARGE_MEM)
1637 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1638 emit_move_insn (addr, XEXP (mem, 0));
1639 mem = replace_equiv_address (mem, addr);
1641 emit_move_insn (ops[0], mem);
1647 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1651 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1652 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1655 emit_insn (gen_pic (ops[0], ops[1]));
1658 rtx pic_reg = get_pic_reg ();
1659 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1660 crtl->uses_pic_offset_table = 1;
1662 return flag_pic || c == IC_IL2s;
1673 /* SAVING is TRUE when we are generating the actual load and store
1674 instructions for REGNO. When determining the size of the stack
1675 needed for saving register we must allocate enough space for the
1676 worst case, because we don't always have the information early enough
1677 to not allocate it. But we can at least eliminate the actual loads
1678 and stores during the prologue/epilogue. */
1680 need_to_save_reg (int regno, int saving)
1682 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1685 && regno == PIC_OFFSET_TABLE_REGNUM
1686 && (!saving || crtl->uses_pic_offset_table)
1688 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1693 /* This function is only correct starting with local register
1696 spu_saved_regs_size (void)
1698 int reg_save_size = 0;
1701 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1702 if (need_to_save_reg (regno, 0))
1703 reg_save_size += 0x10;
1704 return reg_save_size;
1708 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1710 rtx reg = gen_rtx_REG (V4SImode, regno);
1712 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1713 return emit_insn (gen_movv4si (mem, reg));
1717 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1719 rtx reg = gen_rtx_REG (V4SImode, regno);
1721 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1722 return emit_insn (gen_movv4si (reg, mem));
1725 /* This happens after reload, so we need to expand it. */
1727 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1730 if (satisfies_constraint_K (GEN_INT (imm)))
1732 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1736 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1737 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1738 if (REGNO (src) == REGNO (scratch))
1744 /* Return nonzero if this function is known to have a null epilogue. */
1747 direct_return (void)
1749 if (reload_completed)
1751 if (cfun->static_chain_decl == 0
1752 && (spu_saved_regs_size ()
1754 + crtl->outgoing_args_size
1755 + crtl->args.pretend_args_size == 0)
1756 && current_function_is_leaf)
1763 The stack frame looks like this:
1767 AP -> +-------------+
1770 prev SP | back chain |
1773 | reg save | crtl->args.pretend_args_size bytes
1776 | saved regs | spu_saved_regs_size() bytes
1777 FP -> +-------------+
1779 | vars | get_frame_size() bytes
1780 HFP -> +-------------+
1783 | args | crtl->outgoing_args_size bytes
1789 SP -> +-------------+
1793 spu_expand_prologue (void)
1795 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1796 HOST_WIDE_INT total_size;
1797 HOST_WIDE_INT saved_regs_size;
1798 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1799 rtx scratch_reg_0, scratch_reg_1;
1802 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1803 the "toplevel" insn chain. */
1804 emit_note (NOTE_INSN_DELETED);
1806 if (flag_pic && optimize == 0)
1807 crtl->uses_pic_offset_table = 1;
1809 if (spu_naked_function_p (current_function_decl))
1812 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1813 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1815 saved_regs_size = spu_saved_regs_size ();
1816 total_size = size + saved_regs_size
1817 + crtl->outgoing_args_size
1818 + crtl->args.pretend_args_size;
1820 if (!current_function_is_leaf
1821 || cfun->calls_alloca || total_size > 0)
1822 total_size += STACK_POINTER_OFFSET;
1824 /* Save this first because code after this might use the link
1825 register as a scratch register. */
1826 if (!current_function_is_leaf)
1828 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1829 RTX_FRAME_RELATED_P (insn) = 1;
1834 offset = -crtl->args.pretend_args_size;
1835 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1836 if (need_to_save_reg (regno, 1))
1839 insn = frame_emit_store (regno, sp_reg, offset);
1840 RTX_FRAME_RELATED_P (insn) = 1;
1844 if (flag_pic && crtl->uses_pic_offset_table)
1846 rtx pic_reg = get_pic_reg ();
1847 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1848 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1853 if (flag_stack_check)
1855 /* We compare against total_size-1 because
1856 ($sp >= total_size) <=> ($sp > total_size-1) */
1857 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1858 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1859 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1860 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1862 emit_move_insn (scratch_v4si, size_v4si);
1863 size_v4si = scratch_v4si;
1865 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1866 emit_insn (gen_vec_extractv4si
1867 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1868 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1871 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1872 the value of the previous $sp because we save it as the back
1874 if (total_size <= 2000)
1876 /* In this case we save the back chain first. */
1877 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1879 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1881 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1883 insn = emit_move_insn (scratch_reg_0, sp_reg);
1885 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1889 insn = emit_move_insn (scratch_reg_0, sp_reg);
1891 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1893 RTX_FRAME_RELATED_P (insn) = 1;
1894 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1896 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1898 if (total_size > 2000)
1900 /* Save the back chain ptr */
1901 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1904 if (frame_pointer_needed)
1906 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1907 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1908 + crtl->outgoing_args_size;
1909 /* Set the new frame_pointer */
1910 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1911 RTX_FRAME_RELATED_P (insn) = 1;
1912 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1914 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1915 real, REG_NOTES (insn));
1916 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1920 emit_note (NOTE_INSN_DELETED);
1924 spu_expand_epilogue (bool sibcall_p)
1926 int size = get_frame_size (), offset, regno;
1927 HOST_WIDE_INT saved_regs_size, total_size;
1928 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1929 rtx jump, scratch_reg_0;
1931 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1932 the "toplevel" insn chain. */
1933 emit_note (NOTE_INSN_DELETED);
1935 if (spu_naked_function_p (current_function_decl))
1938 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1940 saved_regs_size = spu_saved_regs_size ();
1941 total_size = size + saved_regs_size
1942 + crtl->outgoing_args_size
1943 + crtl->args.pretend_args_size;
1945 if (!current_function_is_leaf
1946 || cfun->calls_alloca || total_size > 0)
1947 total_size += STACK_POINTER_OFFSET;
1951 if (cfun->calls_alloca)
1952 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1954 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1957 if (saved_regs_size > 0)
1959 offset = -crtl->args.pretend_args_size;
1960 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1961 if (need_to_save_reg (regno, 1))
1964 frame_emit_load (regno, sp_reg, offset);
1969 if (!current_function_is_leaf)
1970 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1974 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1975 jump = emit_jump_insn (gen__return ());
1976 emit_barrier_after (jump);
1979 emit_note (NOTE_INSN_DELETED);
1983 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1987 /* This is inefficient because it ends up copying to a save-register
1988 which then gets saved even though $lr has already been saved. But
1989 it does generate better code for leaf functions and we don't need
1990 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1991 used for __builtin_return_address anyway, so maybe we don't care if
1992 it's inefficient. */
1993 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1997 /* Given VAL, generate a constant appropriate for MODE.
1998 If MODE is a vector mode, every element will be VAL.
1999 For TImode, VAL will be zero extended to 128 bits. */
2001 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2007 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2008 || GET_MODE_CLASS (mode) == MODE_FLOAT
2009 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2010 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2012 if (GET_MODE_CLASS (mode) == MODE_INT)
2013 return immed_double_const (val, 0, mode);
2015 /* val is the bit representation of the float */
2016 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2017 return hwint_to_const_double (mode, val);
2019 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2020 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2022 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2024 units = GET_MODE_NUNITS (mode);
2026 v = rtvec_alloc (units);
2028 for (i = 0; i < units; ++i)
2029 RTVEC_ELT (v, i) = inner;
2031 return gen_rtx_CONST_VECTOR (mode, v);
2034 /* Create a MODE vector constant from 4 ints. */
2036 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2038 unsigned char arr[16];
2039 arr[0] = (a >> 24) & 0xff;
2040 arr[1] = (a >> 16) & 0xff;
2041 arr[2] = (a >> 8) & 0xff;
2042 arr[3] = (a >> 0) & 0xff;
2043 arr[4] = (b >> 24) & 0xff;
2044 arr[5] = (b >> 16) & 0xff;
2045 arr[6] = (b >> 8) & 0xff;
2046 arr[7] = (b >> 0) & 0xff;
2047 arr[8] = (c >> 24) & 0xff;
2048 arr[9] = (c >> 16) & 0xff;
2049 arr[10] = (c >> 8) & 0xff;
2050 arr[11] = (c >> 0) & 0xff;
2051 arr[12] = (d >> 24) & 0xff;
2052 arr[13] = (d >> 16) & 0xff;
2053 arr[14] = (d >> 8) & 0xff;
2054 arr[15] = (d >> 0) & 0xff;
2055 return array_to_constant(mode, arr);
2058 /* branch hint stuff */
2060 /* An array of these is used to propagate hints to predecessor blocks. */
2063 rtx prop_jump; /* propagated from another block */
2064 int bb_index; /* the original block. */
2066 static struct spu_bb_info *spu_bb_info;
2068 #define STOP_HINT_P(INSN) \
2069 (GET_CODE(INSN) == CALL_INSN \
2070 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2071 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2073 /* 1 when RTX is a hinted branch or its target. We keep track of
2074 what has been hinted so the safe-hint code can test it easily. */
2075 #define HINTED_P(RTX) \
2076 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2078 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2079 #define SCHED_ON_EVEN_P(RTX) \
2080 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2082 /* Emit a nop for INSN such that the two will dual issue. This assumes
2083 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2084 We check for TImode to handle a MULTI1 insn which has dual issued its
2085 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2088 emit_nop_for_insn (rtx insn)
2092 p = get_pipe (insn);
2093 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2094 new_insn = emit_insn_after (gen_lnop (), insn);
2095 else if (p == 1 && GET_MODE (insn) == TImode)
2097 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2098 PUT_MODE (new_insn, TImode);
2099 PUT_MODE (insn, VOIDmode);
2102 new_insn = emit_insn_after (gen_lnop (), insn);
2103 recog_memoized (new_insn);
2106 /* Insert nops in basic blocks to meet dual issue alignment
2107 requirements. Also make sure hbrp and hint instructions are at least
2108 one cycle apart, possibly inserting a nop. */
2112 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2116 /* This sets up INSN_ADDRESSES. */
2117 shorten_branches (get_insns ());
2119 /* Keep track of length added by nops. */
2123 insn = get_insns ();
2124 if (!active_insn_p (insn))
2125 insn = next_active_insn (insn);
2126 for (; insn; insn = next_insn)
2128 next_insn = next_active_insn (insn);
2129 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2130 || INSN_CODE (insn) == CODE_FOR_hbr)
2134 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2135 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2136 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2139 prev_insn = emit_insn_before (gen_lnop (), insn);
2140 PUT_MODE (prev_insn, GET_MODE (insn));
2141 PUT_MODE (insn, TImode);
2147 if (INSN_CODE (insn) == CODE_FOR_blockage)
2149 if (GET_MODE (insn) == TImode)
2150 PUT_MODE (next_insn, TImode);
2152 next_insn = next_active_insn (insn);
2154 addr = INSN_ADDRESSES (INSN_UID (insn));
2155 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2157 if (((addr + length) & 7) != 0)
2159 emit_nop_for_insn (prev_insn);
2163 else if (GET_MODE (insn) == TImode
2164 && ((next_insn && GET_MODE (next_insn) != TImode)
2165 || get_attr_type (insn) == TYPE_MULTI0)
2166 && ((addr + length) & 7) != 0)
2168 /* prev_insn will always be set because the first insn is
2169 always 8-byte aligned. */
2170 emit_nop_for_insn (prev_insn);
2178 /* Routines for branch hints. */
2181 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2182 int distance, sbitmap blocks)
2184 rtx branch_label = 0;
2189 if (before == 0 || branch == 0 || target == 0)
2192 /* While scheduling we require hints to be no further than 600, so
2193 we need to enforce that here too */
2197 /* If we have a Basic block note, emit it after the basic block note. */
2198 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2199 before = NEXT_INSN (before);
2201 branch_label = gen_label_rtx ();
2202 LABEL_NUSES (branch_label)++;
2203 LABEL_PRESERVE_P (branch_label) = 1;
2204 insn = emit_label_before (branch_label, branch);
2205 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2206 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2208 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2209 recog_memoized (hint);
2210 HINTED_P (branch) = 1;
2212 if (GET_CODE (target) == LABEL_REF)
2213 HINTED_P (XEXP (target, 0)) = 1;
2214 else if (tablejump_p (branch, 0, &table))
2218 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2219 vec = XVEC (PATTERN (table), 0);
2221 vec = XVEC (PATTERN (table), 1);
2222 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2223 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2226 if (distance >= 588)
2228 /* Make sure the hint isn't scheduled any earlier than this point,
2229 which could make it too far for the branch offest to fit */
2230 recog_memoized (emit_insn_before (gen_blockage (), hint));
2232 else if (distance <= 8 * 4)
2234 /* To guarantee at least 8 insns between the hint and branch we
2237 for (d = distance; d < 8 * 4; d += 4)
2240 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2241 recog_memoized (insn);
2244 /* Make sure any nops inserted aren't scheduled before the hint. */
2245 recog_memoized (emit_insn_after (gen_blockage (), hint));
2247 /* Make sure any nops inserted aren't scheduled after the call. */
2248 if (CALL_P (branch) && distance < 8 * 4)
2249 recog_memoized (emit_insn_before (gen_blockage (), branch));
2253 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2254 the rtx for the branch target. */
2256 get_branch_target (rtx branch)
2258 if (GET_CODE (branch) == JUMP_INSN)
2262 /* Return statements */
2263 if (GET_CODE (PATTERN (branch)) == RETURN)
2264 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2267 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2268 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2271 set = single_set (branch);
2272 src = SET_SRC (set);
2273 if (GET_CODE (SET_DEST (set)) != PC)
2276 if (GET_CODE (src) == IF_THEN_ELSE)
2279 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2282 /* If the more probable case is not a fall through, then
2283 try a branch hint. */
2284 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2285 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2286 && GET_CODE (XEXP (src, 1)) != PC)
2287 lab = XEXP (src, 1);
2288 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2289 && GET_CODE (XEXP (src, 2)) != PC)
2290 lab = XEXP (src, 2);
2294 if (GET_CODE (lab) == RETURN)
2295 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2303 else if (GET_CODE (branch) == CALL_INSN)
2306 /* All of our call patterns are in a PARALLEL and the CALL is
2307 the first pattern in the PARALLEL. */
2308 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2310 call = XVECEXP (PATTERN (branch), 0, 0);
2311 if (GET_CODE (call) == SET)
2312 call = SET_SRC (call);
2313 if (GET_CODE (call) != CALL)
2315 return XEXP (XEXP (call, 0), 0);
2320 /* The special $hbr register is used to prevent the insn scheduler from
2321 moving hbr insns across instructions which invalidate them. It
2322 should only be used in a clobber, and this function searches for
2323 insns which clobber it. */
2325 insn_clobbers_hbr (rtx insn)
2328 && GET_CODE (PATTERN (insn)) == PARALLEL)
2330 rtx parallel = PATTERN (insn);
2333 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2335 clobber = XVECEXP (parallel, 0, j);
2336 if (GET_CODE (clobber) == CLOBBER
2337 && GET_CODE (XEXP (clobber, 0)) == REG
2338 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2345 /* Search up to 32 insns starting at FIRST:
2346 - at any kind of hinted branch, just return
2347 - at any unconditional branch in the first 15 insns, just return
2348 - at a call or indirect branch, after the first 15 insns, force it to
2349 an even address and return
2350 - at any unconditional branch, after the first 15 insns, force it to
2352 At then end of the search, insert an hbrp within 4 insns of FIRST,
2353 and an hbrp within 16 instructions of FIRST.
2356 insert_hbrp_for_ilb_runout (rtx first)
2358 rtx insn, before_4 = 0, before_16 = 0;
2359 int addr = 0, length, first_addr = -1;
2360 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2361 int insert_lnop_after = 0;
2362 for (insn = first; insn; insn = NEXT_INSN (insn))
2365 if (first_addr == -1)
2366 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2367 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2368 length = get_attr_length (insn);
2370 if (before_4 == 0 && addr + length >= 4 * 4)
2372 /* We test for 14 instructions because the first hbrp will add
2373 up to 2 instructions. */
2374 if (before_16 == 0 && addr + length >= 14 * 4)
2377 if (INSN_CODE (insn) == CODE_FOR_hbr)
2379 /* Make sure an hbrp is at least 2 cycles away from a hint.
2380 Insert an lnop after the hbrp when necessary. */
2381 if (before_4 == 0 && addr > 0)
2384 insert_lnop_after |= 1;
2386 else if (before_4 && addr <= 4 * 4)
2387 insert_lnop_after |= 1;
2388 if (before_16 == 0 && addr > 10 * 4)
2391 insert_lnop_after |= 2;
2393 else if (before_16 && addr <= 14 * 4)
2394 insert_lnop_after |= 2;
2397 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2399 if (addr < hbrp_addr0)
2401 else if (addr < hbrp_addr1)
2405 if (CALL_P (insn) || JUMP_P (insn))
2407 if (HINTED_P (insn))
2410 /* Any branch after the first 15 insns should be on an even
2411 address to avoid a special case branch. There might be
2412 some nops and/or hbrps inserted, so we test after 10
2415 SCHED_ON_EVEN_P (insn) = 1;
2418 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2422 if (addr + length >= 32 * 4)
2424 gcc_assert (before_4 && before_16);
2425 if (hbrp_addr0 > 4 * 4)
2428 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2429 recog_memoized (insn);
2430 INSN_ADDRESSES_NEW (insn,
2431 INSN_ADDRESSES (INSN_UID (before_4)));
2432 PUT_MODE (insn, GET_MODE (before_4));
2433 PUT_MODE (before_4, TImode);
2434 if (insert_lnop_after & 1)
2436 insn = emit_insn_before (gen_lnop (), before_4);
2437 recog_memoized (insn);
2438 INSN_ADDRESSES_NEW (insn,
2439 INSN_ADDRESSES (INSN_UID (before_4)));
2440 PUT_MODE (insn, TImode);
2443 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2444 && hbrp_addr1 > 16 * 4)
2447 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2448 recog_memoized (insn);
2449 INSN_ADDRESSES_NEW (insn,
2450 INSN_ADDRESSES (INSN_UID (before_16)));
2451 PUT_MODE (insn, GET_MODE (before_16));
2452 PUT_MODE (before_16, TImode);
2453 if (insert_lnop_after & 2)
2455 insn = emit_insn_before (gen_lnop (), before_16);
2456 recog_memoized (insn);
2457 INSN_ADDRESSES_NEW (insn,
2458 INSN_ADDRESSES (INSN_UID
2460 PUT_MODE (insn, TImode);
2466 else if (BARRIER_P (insn))
2471 /* The SPU might hang when it executes 48 inline instructions after a
2472 hinted branch jumps to its hinted target. The beginning of a
2473 function and the return from a call might have been hinted, and must
2474 be handled as well. To prevent a hang we insert 2 hbrps. The first
2475 should be within 6 insns of the branch target. The second should be
2476 within 22 insns of the branch target. When determining if hbrps are
2477 necessary, we look for only 32 inline instructions, because up to to
2478 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2479 new hbrps, we insert them within 4 and 16 insns of the target. */
2484 if (TARGET_SAFE_HINTS)
2486 shorten_branches (get_insns ());
2487 /* Insert hbrp at beginning of function */
2488 insn = next_active_insn (get_insns ());
2490 insert_hbrp_for_ilb_runout (insn);
2491 /* Insert hbrp after hinted targets. */
2492 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2493 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2494 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2498 static int in_spu_reorg;
2500 /* Insert branch hints. There are no branch optimizations after this
2501 pass, so it's safe to set our branch hints now. */
2503 spu_machine_dependent_reorg (void)
2508 rtx branch_target = 0;
2509 int branch_addr = 0, insn_addr, required_dist = 0;
2513 if (!TARGET_BRANCH_HINTS || optimize == 0)
2515 /* We still do it for unoptimized code because an external
2516 function might have hinted a call or return. */
2522 blocks = sbitmap_alloc (last_basic_block);
2523 sbitmap_zero (blocks);
2526 compute_bb_for_insn ();
2531 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2532 sizeof (struct spu_bb_info));
2534 /* We need exact insn addresses and lengths. */
2535 shorten_branches (get_insns ());
2537 for (i = n_basic_blocks - 1; i >= 0; i--)
2539 bb = BASIC_BLOCK (i);
2541 if (spu_bb_info[i].prop_jump)
2543 branch = spu_bb_info[i].prop_jump;
2544 branch_target = get_branch_target (branch);
2545 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2546 required_dist = spu_hint_dist;
2548 /* Search from end of a block to beginning. In this loop, find
2549 jumps which need a branch and emit them only when:
2550 - it's an indirect branch and we're at the insn which sets
2552 - we're at an insn that will invalidate the hint. e.g., a
2553 call, another hint insn, inline asm that clobbers $hbr, and
2554 some inlined operations (divmodsi4). Don't consider jumps
2555 because they are only at the end of a block and are
2556 considered when we are deciding whether to propagate
2557 - we're getting too far away from the branch. The hbr insns
2558 only have a signed 10 bit offset
2559 We go back as far as possible so the branch will be considered
2560 for propagation when we get to the beginning of the block. */
2561 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2565 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2567 && ((GET_CODE (branch_target) == REG
2568 && set_of (branch_target, insn) != NULL_RTX)
2569 || insn_clobbers_hbr (insn)
2570 || branch_addr - insn_addr > 600))
2572 rtx next = NEXT_INSN (insn);
2573 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2574 if (insn != BB_END (bb)
2575 && branch_addr - next_addr >= required_dist)
2579 "hint for %i in block %i before %i\n",
2580 INSN_UID (branch), bb->index,
2582 spu_emit_branch_hint (next, branch, branch_target,
2583 branch_addr - next_addr, blocks);
2588 /* JUMP_P will only be true at the end of a block. When
2589 branch is already set it means we've previously decided
2590 to propagate a hint for that branch into this block. */
2591 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2594 if ((branch_target = get_branch_target (insn)))
2597 branch_addr = insn_addr;
2598 required_dist = spu_hint_dist;
2602 if (insn == BB_HEAD (bb))
2608 /* If we haven't emitted a hint for this branch yet, it might
2609 be profitable to emit it in one of the predecessor blocks,
2610 especially for loops. */
2612 basic_block prev = 0, prop = 0, prev2 = 0;
2613 int loop_exit = 0, simple_loop = 0;
2614 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2616 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2617 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2618 prev = EDGE_PRED (bb, j)->src;
2620 prev2 = EDGE_PRED (bb, j)->src;
2622 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2623 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2625 else if (EDGE_SUCC (bb, j)->dest == bb)
2628 /* If this branch is a loop exit then propagate to previous
2629 fallthru block. This catches the cases when it is a simple
2630 loop or when there is an initial branch into the loop. */
2631 if (prev && (loop_exit || simple_loop)
2632 && prev->loop_depth <= bb->loop_depth)
2635 /* If there is only one adjacent predecessor. Don't propagate
2636 outside this loop. This loop_depth test isn't perfect, but
2637 I'm not sure the loop_father member is valid at this point. */
2638 else if (prev && single_pred_p (bb)
2639 && prev->loop_depth == bb->loop_depth)
2642 /* If this is the JOIN block of a simple IF-THEN then
2643 propogate the hint to the HEADER block. */
2644 else if (prev && prev2
2645 && EDGE_COUNT (bb->preds) == 2
2646 && EDGE_COUNT (prev->preds) == 1
2647 && EDGE_PRED (prev, 0)->src == prev2
2648 && prev2->loop_depth == bb->loop_depth
2649 && GET_CODE (branch_target) != REG)
2652 /* Don't propagate when:
2653 - this is a simple loop and the hint would be too far
2654 - this is not a simple loop and there are 16 insns in
2656 - the predecessor block ends in a branch that will be
2658 - the predecessor block ends in an insn that invalidates
2662 && (bbend = BB_END (prop))
2663 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2664 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2665 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2668 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2669 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2670 bb->index, prop->index, bb->loop_depth,
2671 INSN_UID (branch), loop_exit, simple_loop,
2672 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2674 spu_bb_info[prop->index].prop_jump = branch;
2675 spu_bb_info[prop->index].bb_index = i;
2677 else if (branch_addr - next_addr >= required_dist)
2680 fprintf (dump_file, "hint for %i in block %i before %i\n",
2681 INSN_UID (branch), bb->index,
2682 INSN_UID (NEXT_INSN (insn)));
2683 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2684 branch_addr - next_addr, blocks);
2691 if (!sbitmap_empty_p (blocks))
2692 find_many_sub_basic_blocks (blocks);
2694 /* We have to schedule to make sure alignment is ok. */
2695 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2697 /* The hints need to be scheduled, so call it again. */
2705 if (spu_flag_var_tracking)
2708 timevar_push (TV_VAR_TRACKING);
2709 variable_tracking_main ();
2710 timevar_pop (TV_VAR_TRACKING);
2711 df_finish_pass (false);
2714 free_bb_for_insn ();
2720 /* Insn scheduling routines, primarily for dual issue. */
2722 spu_sched_issue_rate (void)
2728 uses_ls_unit(rtx insn)
2730 rtx set = single_set (insn);
2732 && (GET_CODE (SET_DEST (set)) == MEM
2733 || GET_CODE (SET_SRC (set)) == MEM))
2742 /* Handle inline asm */
2743 if (INSN_CODE (insn) == -1)
2745 t = get_attr_type (insn);
2770 case TYPE_IPREFETCH:
2778 /* haifa-sched.c has a static variable that keeps track of the current
2779 cycle. It is passed to spu_sched_reorder, and we record it here for
2780 use by spu_sched_variable_issue. It won't be accurate if the
2781 scheduler updates it's clock_var between the two calls. */
2782 static int clock_var;
2784 /* This is used to keep track of insn alignment. Set to 0 at the
2785 beginning of each block and increased by the "length" attr of each
2787 static int spu_sched_length;
2789 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2790 ready list appropriately in spu_sched_reorder(). */
2791 static int pipe0_clock;
2792 static int pipe1_clock;
2794 static int prev_clock_var;
2796 static int prev_priority;
2798 /* The SPU needs to load the next ilb sometime during the execution of
2799 the previous ilb. There is a potential conflict if every cycle has a
2800 load or store. To avoid the conflict we make sure the load/store
2801 unit is free for at least one cycle during the execution of insns in
2802 the previous ilb. */
2803 static int spu_ls_first;
2804 static int prev_ls_clock;
2807 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2808 int max_ready ATTRIBUTE_UNUSED)
2810 spu_sched_length = 0;
2814 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2815 int max_ready ATTRIBUTE_UNUSED)
2817 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2819 /* When any block might be at least 8-byte aligned, assume they
2820 will all be at least 8-byte aligned to make sure dual issue
2821 works out correctly. */
2822 spu_sched_length = 0;
2824 spu_ls_first = INT_MAX;
2829 prev_clock_var = -1;
2834 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2835 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2839 if (GET_CODE (PATTERN (insn)) == USE
2840 || GET_CODE (PATTERN (insn)) == CLOBBER
2841 || (len = get_attr_length (insn)) == 0)
2844 spu_sched_length += len;
2846 /* Reset on inline asm */
2847 if (INSN_CODE (insn) == -1)
2849 spu_ls_first = INT_MAX;
2854 p = get_pipe (insn);
2856 pipe0_clock = clock_var;
2858 pipe1_clock = clock_var;
2862 if (clock_var - prev_ls_clock > 1
2863 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2864 spu_ls_first = INT_MAX;
2865 if (uses_ls_unit (insn))
2867 if (spu_ls_first == INT_MAX)
2868 spu_ls_first = spu_sched_length;
2869 prev_ls_clock = clock_var;
2872 /* The scheduler hasn't inserted the nop, but we will later on.
2873 Include those nops in spu_sched_length. */
2874 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2875 spu_sched_length += 4;
2876 prev_clock_var = clock_var;
2878 /* more is -1 when called from spu_sched_reorder for new insns
2879 that don't have INSN_PRIORITY */
2881 prev_priority = INSN_PRIORITY (insn);
2884 /* Always try issueing more insns. spu_sched_reorder will decide
2885 when the cycle should be advanced. */
2889 /* This function is called for both TARGET_SCHED_REORDER and
2890 TARGET_SCHED_REORDER2. */
2892 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2893 rtx *ready, int *nreadyp, int clock)
2895 int i, nready = *nreadyp;
2896 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2901 if (nready <= 0 || pipe1_clock >= clock)
2904 /* Find any rtl insns that don't generate assembly insns and schedule
2906 for (i = nready - 1; i >= 0; i--)
2909 if (INSN_CODE (insn) == -1
2910 || INSN_CODE (insn) == CODE_FOR_blockage
2911 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2913 ready[i] = ready[nready - 1];
2914 ready[nready - 1] = insn;
2919 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2920 for (i = 0; i < nready; i++)
2921 if (INSN_CODE (ready[i]) != -1)
2924 switch (get_attr_type (insn))
2949 case TYPE_IPREFETCH:
2955 /* In the first scheduling phase, schedule loads and stores together
2956 to increase the chance they will get merged during postreload CSE. */
2957 if (!reload_completed && pipe_ls >= 0)
2959 insn = ready[pipe_ls];
2960 ready[pipe_ls] = ready[nready - 1];
2961 ready[nready - 1] = insn;
2965 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2969 /* When we have loads/stores in every cycle of the last 15 insns and
2970 we are about to schedule another load/store, emit an hbrp insn
2973 && spu_sched_length - spu_ls_first >= 4 * 15
2974 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2976 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2977 recog_memoized (insn);
2978 if (pipe0_clock < clock)
2979 PUT_MODE (insn, TImode);
2980 spu_sched_variable_issue (file, verbose, insn, -1);
2984 /* In general, we want to emit nops to increase dual issue, but dual
2985 issue isn't faster when one of the insns could be scheduled later
2986 without effecting the critical path. We look at INSN_PRIORITY to
2987 make a good guess, but it isn't perfect so -mdual-nops=n can be
2988 used to effect it. */
2989 if (in_spu_reorg && spu_dual_nops < 10)
2991 /* When we are at an even address and we are not issueing nops to
2992 improve scheduling then we need to advance the cycle. */
2993 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2994 && (spu_dual_nops == 0
2997 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3000 /* When at an odd address, schedule the highest priority insn
3001 without considering pipeline. */
3002 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3003 && (spu_dual_nops == 0
3005 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3010 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3011 pipe0 insn in the ready list, schedule it. */
3012 if (pipe0_clock < clock && pipe_0 >= 0)
3013 schedule_i = pipe_0;
3015 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3016 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3018 schedule_i = pipe_1;
3020 if (schedule_i > -1)
3022 insn = ready[schedule_i];
3023 ready[schedule_i] = ready[nready - 1];
3024 ready[nready - 1] = insn;
3030 /* INSN is dependent on DEP_INSN. */
3032 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3036 /* The blockage pattern is used to prevent instructions from being
3037 moved across it and has no cost. */
3038 if (INSN_CODE (insn) == CODE_FOR_blockage
3039 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3042 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3043 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3046 /* Make sure hbrps are spread out. */
3047 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3048 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3051 /* Make sure hints and hbrps are 2 cycles apart. */
3052 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3053 || INSN_CODE (insn) == CODE_FOR_hbr)
3054 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3055 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3058 /* An hbrp has no real dependency on other insns. */
3059 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3060 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3063 /* Assuming that it is unlikely an argument register will be used in
3064 the first cycle of the called function, we reduce the cost for
3065 slightly better scheduling of dep_insn. When not hinted, the
3066 mispredicted branch would hide the cost as well. */
3069 rtx target = get_branch_target (insn);
3070 if (GET_CODE (target) != REG || !set_of (target, insn))
3075 /* And when returning from a function, let's assume the return values
3076 are completed sooner too. */
3077 if (CALL_P (dep_insn))
3080 /* Make sure an instruction that loads from the back chain is schedule
3081 away from the return instruction so a hint is more likely to get
3083 if (INSN_CODE (insn) == CODE_FOR__return
3084 && (set = single_set (dep_insn))
3085 && GET_CODE (SET_DEST (set)) == REG
3086 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3089 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3090 scheduler makes every insn in a block anti-dependent on the final
3091 jump_insn. We adjust here so higher cost insns will get scheduled
3093 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3094 return insn_cost (dep_insn) - 3;
3099 /* Create a CONST_DOUBLE from a string. */
3101 spu_float_const (const char *string, enum machine_mode mode)
3103 REAL_VALUE_TYPE value;
3104 value = REAL_VALUE_ATOF (string, mode);
3105 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3109 spu_constant_address_p (rtx x)
3111 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3112 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3113 || GET_CODE (x) == HIGH);
3116 static enum spu_immediate
3117 which_immediate_load (HOST_WIDE_INT val)
3119 gcc_assert (val == trunc_int_for_mode (val, SImode));
3121 if (val >= -0x8000 && val <= 0x7fff)
3123 if (val >= 0 && val <= 0x3ffff)
3125 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3127 if ((val & 0xffff) == 0)
3133 /* Return true when OP can be loaded by one of the il instructions, or
3134 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3136 immediate_load_p (rtx op, enum machine_mode mode)
3138 if (CONSTANT_P (op))
3140 enum immediate_class c = classify_immediate (op, mode);
3141 return c == IC_IL1 || c == IC_IL1s
3142 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3147 /* Return true if the first SIZE bytes of arr is a constant that can be
3148 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3149 represent the size and offset of the instruction to use. */
3151 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3153 int cpat, run, i, start;
3157 for (i = 0; i < size && cpat; i++)
3165 else if (arr[i] == 2 && arr[i+1] == 3)
3167 else if (arr[i] == 0)
3169 while (arr[i+run] == run && i+run < 16)
3171 if (run != 4 && run != 8)
3176 if ((i & (run-1)) != 0)
3183 if (cpat && (run || size < 16))
3190 *pstart = start == -1 ? 16-run : start;
3196 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3197 it into a register. MODE is only valid when OP is a CONST_INT. */
3198 static enum immediate_class
3199 classify_immediate (rtx op, enum machine_mode mode)
3202 unsigned char arr[16];
3203 int i, j, repeated, fsmbi, repeat;
3205 gcc_assert (CONSTANT_P (op));
3207 if (GET_MODE (op) != VOIDmode)
3208 mode = GET_MODE (op);
3210 /* A V4SI const_vector with all identical symbols is ok. */
3213 && GET_CODE (op) == CONST_VECTOR
3214 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3215 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3216 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3217 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3218 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3219 op = CONST_VECTOR_ELT (op, 0);
3221 switch (GET_CODE (op))
3225 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3228 /* We can never know if the resulting address fits in 18 bits and can be
3229 loaded with ila. For now, assume the address will not overflow if
3230 the displacement is "small" (fits 'K' constraint). */
3231 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3233 rtx sym = XEXP (XEXP (op, 0), 0);
3234 rtx cst = XEXP (XEXP (op, 0), 1);
3236 if (GET_CODE (sym) == SYMBOL_REF
3237 && GET_CODE (cst) == CONST_INT
3238 && satisfies_constraint_K (cst))
3247 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3248 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3249 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3255 constant_to_array (mode, op, arr);
3257 /* Check that each 4-byte slot is identical. */
3259 for (i = 4; i < 16; i += 4)
3260 for (j = 0; j < 4; j++)
3261 if (arr[j] != arr[i + j])
3266 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3267 val = trunc_int_for_mode (val, SImode);
3269 if (which_immediate_load (val) != SPU_NONE)
3273 /* Any mode of 2 bytes or smaller can be loaded with an il
3275 gcc_assert (GET_MODE_SIZE (mode) > 2);
3279 for (i = 0; i < 16 && fsmbi; i++)
3280 if (arr[i] != 0 && repeat == 0)
3282 else if (arr[i] != 0 && arr[i] != repeat)
3285 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3287 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3300 static enum spu_immediate
3301 which_logical_immediate (HOST_WIDE_INT val)
3303 gcc_assert (val == trunc_int_for_mode (val, SImode));
3305 if (val >= -0x200 && val <= 0x1ff)
3307 if (val >= 0 && val <= 0xffff)
3309 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3311 val = trunc_int_for_mode (val, HImode);
3312 if (val >= -0x200 && val <= 0x1ff)
3314 if ((val & 0xff) == ((val >> 8) & 0xff))
3316 val = trunc_int_for_mode (val, QImode);
3317 if (val >= -0x200 && val <= 0x1ff)
3324 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3327 const_vector_immediate_p (rtx x)
3330 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3331 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3332 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3333 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3339 logical_immediate_p (rtx op, enum machine_mode mode)
3342 unsigned char arr[16];
3345 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3346 || GET_CODE (op) == CONST_VECTOR);
3348 if (GET_CODE (op) == CONST_VECTOR
3349 && !const_vector_immediate_p (op))
3352 if (GET_MODE (op) != VOIDmode)
3353 mode = GET_MODE (op);
3355 constant_to_array (mode, op, arr);
3357 /* Check that bytes are repeated. */
3358 for (i = 4; i < 16; i += 4)
3359 for (j = 0; j < 4; j++)
3360 if (arr[j] != arr[i + j])
3363 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3364 val = trunc_int_for_mode (val, SImode);
3366 i = which_logical_immediate (val);
3367 return i != SPU_NONE && i != SPU_IOHL;
3371 iohl_immediate_p (rtx op, enum machine_mode mode)
3374 unsigned char arr[16];
3377 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3378 || GET_CODE (op) == CONST_VECTOR);
3380 if (GET_CODE (op) == CONST_VECTOR
3381 && !const_vector_immediate_p (op))
3384 if (GET_MODE (op) != VOIDmode)
3385 mode = GET_MODE (op);
3387 constant_to_array (mode, op, arr);
3389 /* Check that bytes are repeated. */
3390 for (i = 4; i < 16; i += 4)
3391 for (j = 0; j < 4; j++)
3392 if (arr[j] != arr[i + j])
3395 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3396 val = trunc_int_for_mode (val, SImode);
3398 return val >= 0 && val <= 0xffff;
3402 arith_immediate_p (rtx op, enum machine_mode mode,
3403 HOST_WIDE_INT low, HOST_WIDE_INT high)
3406 unsigned char arr[16];
3409 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3410 || GET_CODE (op) == CONST_VECTOR);
3412 if (GET_CODE (op) == CONST_VECTOR
3413 && !const_vector_immediate_p (op))
3416 if (GET_MODE (op) != VOIDmode)
3417 mode = GET_MODE (op);
3419 constant_to_array (mode, op, arr);
3421 if (VECTOR_MODE_P (mode))
3422 mode = GET_MODE_INNER (mode);
3424 bytes = GET_MODE_SIZE (mode);
3425 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3427 /* Check that bytes are repeated. */
3428 for (i = bytes; i < 16; i += bytes)
3429 for (j = 0; j < bytes; j++)
3430 if (arr[j] != arr[i + j])
3434 for (j = 1; j < bytes; j++)
3435 val = (val << 8) | arr[j];
3437 val = trunc_int_for_mode (val, mode);
3439 return val >= low && val <= high;
3443 - any 32-bit constant (SImode, SFmode)
3444 - any constant that can be generated with fsmbi (any mode)
3445 - a 64-bit constant where the high and low bits are identical
3447 - a 128-bit constant where the four 32-bit words match. */
3449 spu_legitimate_constant_p (rtx x)
3451 if (GET_CODE (x) == HIGH)
3453 /* V4SI with all identical symbols is valid. */
3455 && GET_MODE (x) == V4SImode
3456 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3457 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3458 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3459 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3460 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3461 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3463 if (GET_CODE (x) == CONST_VECTOR
3464 && !const_vector_immediate_p (x))
3469 /* Valid address are:
3470 - symbol_ref, label_ref, const
3472 - reg + const, where either reg or const is 16 byte aligned
3473 - reg + reg, alignment doesn't matter
3474 The alignment matters in the reg+const case because lqd and stqd
3475 ignore the 4 least significant bits of the const. (TODO: It might be
3476 preferable to allow any alignment and fix it up when splitting.) */
3478 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3479 rtx x, int reg_ok_strict)
3481 if (mode == TImode && GET_CODE (x) == AND
3482 && GET_CODE (XEXP (x, 1)) == CONST_INT
3483 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3485 switch (GET_CODE (x))
3489 return !TARGET_LARGE_MEM;
3492 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3494 rtx sym = XEXP (XEXP (x, 0), 0);
3495 rtx cst = XEXP (XEXP (x, 0), 1);
3497 /* Accept any symbol_ref + constant, assuming it does not
3498 wrap around the local store addressability limit. */
3499 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3505 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3509 gcc_assert (GET_CODE (x) == REG);
3512 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3517 rtx op0 = XEXP (x, 0);
3518 rtx op1 = XEXP (x, 1);
3519 if (GET_CODE (op0) == SUBREG)
3520 op0 = XEXP (op0, 0);
3521 if (GET_CODE (op1) == SUBREG)
3522 op1 = XEXP (op1, 0);
3523 /* We can't just accept any aligned register because CSE can
3524 change it to a register that is not marked aligned and then
3525 recog will fail. So we only accept frame registers because
3526 they will only be changed to other frame registers. */
3527 if (GET_CODE (op0) == REG
3528 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3529 && GET_CODE (op1) == CONST_INT
3530 && INTVAL (op1) >= -0x2000
3531 && INTVAL (op1) <= 0x1fff
3532 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
3534 if (GET_CODE (op0) == REG
3535 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3536 && GET_CODE (op1) == REG
3537 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3548 /* When the address is reg + const_int, force the const_int into a
3551 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3552 enum machine_mode mode)
3555 /* Make sure both operands are registers. */
3556 if (GET_CODE (x) == PLUS)
3560 if (ALIGNED_SYMBOL_REF_P (op0))
3562 op0 = force_reg (Pmode, op0);
3563 mark_reg_pointer (op0, 128);
3565 else if (GET_CODE (op0) != REG)
3566 op0 = force_reg (Pmode, op0);
3567 if (ALIGNED_SYMBOL_REF_P (op1))
3569 op1 = force_reg (Pmode, op1);
3570 mark_reg_pointer (op1, 128);
3572 else if (GET_CODE (op1) != REG)
3573 op1 = force_reg (Pmode, op1);
3574 x = gen_rtx_PLUS (Pmode, op0, op1);
3575 if (spu_legitimate_address (mode, x, 0))
3581 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3582 struct attribute_spec.handler. */
3584 spu_handle_fndecl_attribute (tree * node,
3586 tree args ATTRIBUTE_UNUSED,
3587 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3589 if (TREE_CODE (*node) != FUNCTION_DECL)
3591 warning (0, "`%s' attribute only applies to functions",
3592 IDENTIFIER_POINTER (name));
3593 *no_add_attrs = true;
3599 /* Handle the "vector" attribute. */
3601 spu_handle_vector_attribute (tree * node, tree name,
3602 tree args ATTRIBUTE_UNUSED,
3603 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3605 tree type = *node, result = NULL_TREE;
3606 enum machine_mode mode;
3609 while (POINTER_TYPE_P (type)
3610 || TREE_CODE (type) == FUNCTION_TYPE
3611 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3612 type = TREE_TYPE (type);
3614 mode = TYPE_MODE (type);
3616 unsigned_p = TYPE_UNSIGNED (type);
3620 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3623 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3626 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3629 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3632 result = V4SF_type_node;
3635 result = V2DF_type_node;
3641 /* Propagate qualifiers attached to the element type
3642 onto the vector type. */
3643 if (result && result != type && TYPE_QUALS (type))
3644 result = build_qualified_type (result, TYPE_QUALS (type));
3646 *no_add_attrs = true; /* No need to hang on to the attribute. */
3649 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3651 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3656 /* Return nonzero if FUNC is a naked function. */
3658 spu_naked_function_p (tree func)
3662 if (TREE_CODE (func) != FUNCTION_DECL)
3665 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3666 return a != NULL_TREE;
3670 spu_initial_elimination_offset (int from, int to)
3672 int saved_regs_size = spu_saved_regs_size ();
3674 if (!current_function_is_leaf || crtl->outgoing_args_size
3675 || get_frame_size () || saved_regs_size)
3676 sp_offset = STACK_POINTER_OFFSET;
3677 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3678 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3679 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3680 return get_frame_size ();
3681 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3682 return sp_offset + crtl->outgoing_args_size
3683 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3684 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3685 return get_frame_size () + saved_regs_size + sp_offset;
3691 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3693 enum machine_mode mode = TYPE_MODE (type);
3694 int byte_size = ((mode == BLKmode)
3695 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3697 /* Make sure small structs are left justified in a register. */
3698 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3699 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3701 enum machine_mode smode;
3704 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3705 int n = byte_size / UNITS_PER_WORD;
3706 v = rtvec_alloc (nregs);
3707 for (i = 0; i < n; i++)
3709 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3710 gen_rtx_REG (TImode,
3713 GEN_INT (UNITS_PER_WORD * i));
3714 byte_size -= UNITS_PER_WORD;
3722 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3724 gen_rtx_EXPR_LIST (VOIDmode,
3725 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3726 GEN_INT (UNITS_PER_WORD * n));
3728 return gen_rtx_PARALLEL (mode, v);
3730 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3734 spu_function_arg (CUMULATIVE_ARGS cum,
3735 enum machine_mode mode,
3736 tree type, int named ATTRIBUTE_UNUSED)
3740 if (cum >= MAX_REGISTER_ARGS)
3743 byte_size = ((mode == BLKmode)
3744 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3746 /* The ABI does not allow parameters to be passed partially in
3747 reg and partially in stack. */
3748 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3751 /* Make sure small structs are left justified in a register. */
3752 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3753 && byte_size < UNITS_PER_WORD && byte_size > 0)
3755 enum machine_mode smode;
3759 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3760 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3761 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3763 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3766 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3769 /* Variable sized types are passed by reference. */
3771 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3772 enum machine_mode mode ATTRIBUTE_UNUSED,
3773 const_tree type, bool named ATTRIBUTE_UNUSED)
3775 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3781 /* Create and return the va_list datatype.
3783 On SPU, va_list is an array type equivalent to
3785 typedef struct __va_list_tag
3787 void *__args __attribute__((__aligned(16)));
3788 void *__skip __attribute__((__aligned(16)));
3792 where __args points to the arg that will be returned by the next
3793 va_arg(), and __skip points to the previous stack frame such that
3794 when __args == __skip we should advance __args by 32 bytes. */
3796 spu_build_builtin_va_list (void)
3798 tree f_args, f_skip, record, type_decl;
3801 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3804 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3806 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3807 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3809 DECL_FIELD_CONTEXT (f_args) = record;
3810 DECL_ALIGN (f_args) = 128;
3811 DECL_USER_ALIGN (f_args) = 1;
3813 DECL_FIELD_CONTEXT (f_skip) = record;
3814 DECL_ALIGN (f_skip) = 128;
3815 DECL_USER_ALIGN (f_skip) = 1;
3817 TREE_CHAIN (record) = type_decl;
3818 TYPE_NAME (record) = type_decl;
3819 TYPE_FIELDS (record) = f_args;
3820 TREE_CHAIN (f_args) = f_skip;
3822 /* We know this is being padded and we want it too. It is an internal
3823 type so hide the warnings from the user. */
3825 warn_padded = false;
3827 layout_type (record);
3831 /* The correct type is an array type of one element. */
3832 return build_array_type (record, build_index_type (size_zero_node));
3835 /* Implement va_start by filling the va_list structure VALIST.
3836 NEXTARG points to the first anonymous stack argument.
3838 The following global variables are used to initialize
3839 the va_list structure:
3842 the CUMULATIVE_ARGS for this function
3844 crtl->args.arg_offset_rtx:
3845 holds the offset of the first anonymous stack argument
3846 (relative to the virtual arg pointer). */
3849 spu_va_start (tree valist, rtx nextarg)
3851 tree f_args, f_skip;
3854 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3855 f_skip = TREE_CHAIN (f_args);
3857 valist = build_va_arg_indirect_ref (valist);
3859 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3861 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3863 /* Find the __args area. */
3864 t = make_tree (TREE_TYPE (args), nextarg);
3865 if (crtl->args.pretend_args_size > 0)
3866 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3867 size_int (-STACK_POINTER_OFFSET));
3868 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3869 TREE_SIDE_EFFECTS (t) = 1;
3870 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3872 /* Find the __skip area. */
3873 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3874 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3875 size_int (crtl->args.pretend_args_size
3876 - STACK_POINTER_OFFSET));
3877 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3878 TREE_SIDE_EFFECTS (t) = 1;
3879 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3882 /* Gimplify va_arg by updating the va_list structure
3883 VALIST as required to retrieve an argument of type
3884 TYPE, and returning that argument.
3886 ret = va_arg(VALIST, TYPE);
3888 generates code equivalent to:
3890 paddedsize = (sizeof(TYPE) + 15) & -16;
3891 if (VALIST.__args + paddedsize > VALIST.__skip
3892 && VALIST.__args <= VALIST.__skip)
3893 addr = VALIST.__skip + 32;
3895 addr = VALIST.__args;
3896 VALIST.__args = addr + paddedsize;
3897 ret = *(TYPE *)addr;
3900 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3901 gimple_seq * post_p ATTRIBUTE_UNUSED)
3903 tree f_args, f_skip;
3905 HOST_WIDE_INT size, rsize;
3906 tree paddedsize, addr, tmp;
3907 bool pass_by_reference_p;
3909 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3910 f_skip = TREE_CHAIN (f_args);
3912 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3914 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3916 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3918 addr = create_tmp_var (ptr_type_node, "va_arg");
3919 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3921 /* if an object is dynamically sized, a pointer to it is passed
3922 instead of the object itself. */
3923 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3925 if (pass_by_reference_p)
3926 type = build_pointer_type (type);
3927 size = int_size_in_bytes (type);
3928 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3930 /* build conditional expression to calculate addr. The expression
3931 will be gimplified later. */
3932 paddedsize = size_int (rsize);
3933 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
3934 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3935 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3936 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3937 unshare_expr (skip)));
3939 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3940 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3941 size_int (32)), unshare_expr (args));
3943 gimplify_assign (addr, tmp, pre_p);
3945 /* update VALIST.__args */
3946 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3947 gimplify_assign (unshare_expr (args), tmp, pre_p);
3949 addr = fold_convert (build_pointer_type (type), addr);
3951 if (pass_by_reference_p)
3952 addr = build_va_arg_indirect_ref (addr);
3954 return build_va_arg_indirect_ref (addr);
3957 /* Save parameter registers starting with the register that corresponds
3958 to the first unnamed parameters. If the first unnamed parameter is
3959 in the stack then save no registers. Set pretend_args_size to the
3960 amount of space needed to save the registers. */
3962 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3963 tree type, int *pretend_size, int no_rtl)
3972 /* cum currently points to the last named argument, we want to
3973 start at the next argument. */
3974 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3976 offset = -STACK_POINTER_OFFSET;
3977 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3979 tmp = gen_frame_mem (V4SImode,
3980 plus_constant (virtual_incoming_args_rtx,
3982 emit_move_insn (tmp,
3983 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3986 *pretend_size = offset + STACK_POINTER_OFFSET;
3991 spu_conditional_register_usage (void)
3995 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3996 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4000 /* This is called to decide when we can simplify a load instruction. We
4001 must only return true for registers which we know will always be
4002 aligned. Taking into account that CSE might replace this reg with
4003 another one that has not been marked aligned.
4004 So this is really only true for frame, stack and virtual registers,
4005 which we know are always aligned and should not be adversely effected
4008 regno_aligned_for_load (int regno)
4010 return regno == FRAME_POINTER_REGNUM
4011 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
4012 || regno == ARG_POINTER_REGNUM
4013 || regno == STACK_POINTER_REGNUM
4014 || (regno >= FIRST_VIRTUAL_REGISTER
4015 && regno <= LAST_VIRTUAL_REGISTER);
4018 /* Return TRUE when mem is known to be 16-byte aligned. */
4020 aligned_mem_p (rtx mem)
4022 if (MEM_ALIGN (mem) >= 128)
4024 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4026 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4028 rtx p0 = XEXP (XEXP (mem, 0), 0);
4029 rtx p1 = XEXP (XEXP (mem, 0), 1);
4030 if (regno_aligned_for_load (REGNO (p0)))
4032 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4034 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4038 else if (GET_CODE (XEXP (mem, 0)) == REG)
4040 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4043 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4045 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4047 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4048 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4049 if (GET_CODE (p0) == SYMBOL_REF
4050 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4056 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4057 into its SYMBOL_REF_FLAGS. */
4059 spu_encode_section_info (tree decl, rtx rtl, int first)
4061 default_encode_section_info (decl, rtl, first);
4063 /* If a variable has a forced alignment to < 16 bytes, mark it with
4064 SYMBOL_FLAG_ALIGN1. */
4065 if (TREE_CODE (decl) == VAR_DECL
4066 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4067 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4070 /* Return TRUE if we are certain the mem refers to a complete object
4071 which is both 16-byte aligned and padded to a 16-byte boundary. This
4072 would make it safe to store with a single instruction.
4073 We guarantee the alignment and padding for static objects by aligning
4074 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4075 FIXME: We currently cannot guarantee this for objects on the stack
4076 because assign_parm_setup_stack calls assign_stack_local with the
4077 alignment of the parameter mode and in that case the alignment never
4078 gets adjusted by LOCAL_ALIGNMENT. */
4080 store_with_one_insn_p (rtx mem)
4082 rtx addr = XEXP (mem, 0);
4083 if (GET_MODE (mem) == BLKmode)
4085 /* Only static objects. */
4086 if (GET_CODE (addr) == SYMBOL_REF)
4088 /* We use the associated declaration to make sure the access is
4089 referring to the whole object.
4090 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4091 if it is necessary. Will there be cases where one exists, and
4092 the other does not? Will there be cases where both exist, but
4093 have different types? */
4094 tree decl = MEM_EXPR (mem);
4096 && TREE_CODE (decl) == VAR_DECL
4097 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4099 decl = SYMBOL_REF_DECL (addr);
4101 && TREE_CODE (decl) == VAR_DECL
4102 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4109 spu_expand_mov (rtx * ops, enum machine_mode mode)
4111 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4114 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4116 rtx from = SUBREG_REG (ops[1]);
4117 enum machine_mode imode = GET_MODE (from);
4119 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4120 && GET_MODE_CLASS (imode) == MODE_INT
4121 && subreg_lowpart_p (ops[1]));
4123 if (GET_MODE_SIZE (imode) < 4)
4125 from = gen_rtx_SUBREG (SImode, from, 0);
4129 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4131 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4132 emit_insn (GEN_FCN (icode) (ops[0], from));
4135 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4139 /* At least one of the operands needs to be a register. */
4140 if ((reload_in_progress | reload_completed) == 0
4141 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4143 rtx temp = force_reg (mode, ops[1]);
4144 emit_move_insn (ops[0], temp);
4147 if (reload_in_progress || reload_completed)
4149 if (CONSTANT_P (ops[1]))
4150 return spu_split_immediate (ops);
4155 if (GET_CODE (ops[0]) == MEM)
4157 if (!spu_valid_move (ops))
4159 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4160 gen_reg_rtx (TImode)));
4164 else if (GET_CODE (ops[1]) == MEM)
4166 if (!spu_valid_move (ops))
4169 (ops[0], ops[1], gen_reg_rtx (TImode),
4170 gen_reg_rtx (SImode)));
4174 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4176 if (GET_CODE (ops[1]) == CONST_INT)
4178 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4179 if (val != INTVAL (ops[1]))
4181 emit_move_insn (ops[0], GEN_INT (val));
4190 spu_split_load (rtx * ops)
4192 enum machine_mode mode = GET_MODE (ops[0]);
4193 rtx addr, load, rot, mem, p0, p1;
4196 addr = XEXP (ops[1], 0);
4200 if (GET_CODE (addr) == PLUS)
4203 aligned reg + aligned reg => lqx
4204 aligned reg + unaligned reg => lqx, rotqby
4205 aligned reg + aligned const => lqd
4206 aligned reg + unaligned const => lqd, rotqbyi
4207 unaligned reg + aligned reg => lqx, rotqby
4208 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4209 unaligned reg + aligned const => lqd, rotqby
4210 unaligned reg + unaligned const -> not allowed by legitimate address
4212 p0 = XEXP (addr, 0);
4213 p1 = XEXP (addr, 1);
4214 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
4216 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4218 emit_insn (gen_addsi3 (ops[3], p0, p1));
4226 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4228 rot_amt = INTVAL (p1) & 15;
4229 p1 = GEN_INT (INTVAL (p1) & -16);
4230 addr = gen_rtx_PLUS (SImode, p0, p1);
4232 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
4236 else if (GET_CODE (addr) == REG)
4238 if (!regno_aligned_for_load (REGNO (addr)))
4241 else if (GET_CODE (addr) == CONST)
4243 if (GET_CODE (XEXP (addr, 0)) == PLUS
4244 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4245 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4247 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4249 addr = gen_rtx_CONST (Pmode,
4250 gen_rtx_PLUS (Pmode,
4251 XEXP (XEXP (addr, 0), 0),
4252 GEN_INT (rot_amt & -16)));
4254 addr = XEXP (XEXP (addr, 0), 0);
4259 else if (GET_CODE (addr) == CONST_INT)
4261 rot_amt = INTVAL (addr);
4262 addr = GEN_INT (rot_amt & -16);
4264 else if (!ALIGNED_SYMBOL_REF_P (addr))
4267 if (GET_MODE_SIZE (mode) < 4)
4268 rot_amt += GET_MODE_SIZE (mode) - 4;
4274 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4281 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4282 mem = change_address (ops[1], TImode, addr);
4284 emit_insn (gen_movti (load, mem));
4287 emit_insn (gen_rotqby_ti (load, load, rot));
4289 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4291 if (reload_completed)
4292 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4294 emit_insn (gen_spu_convert (ops[0], load));
4298 spu_split_store (rtx * ops)
4300 enum machine_mode mode = GET_MODE (ops[0]);
4303 rtx addr, p0, p1, p1_lo, smem;
4307 addr = XEXP (ops[0], 0);
4309 if (GET_CODE (addr) == PLUS)
4312 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4313 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4314 aligned reg + aligned const => lqd, c?d, shuf, stqx
4315 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4316 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4317 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4318 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4319 unaligned reg + unaligned const -> not allowed by legitimate address
4322 p0 = XEXP (addr, 0);
4323 p1 = p1_lo = XEXP (addr, 1);
4324 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4326 p1_lo = GEN_INT (INTVAL (p1) & 15);
4327 p1 = GEN_INT (INTVAL (p1) & -16);
4328 addr = gen_rtx_PLUS (SImode, p0, p1);
4331 else if (GET_CODE (addr) == REG)
4335 p1 = p1_lo = const0_rtx;
4340 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4341 p1 = 0; /* aform doesn't use p1 */
4343 if (ALIGNED_SYMBOL_REF_P (addr))
4345 else if (GET_CODE (addr) == CONST)
4347 if (GET_CODE (XEXP (addr, 0)) == PLUS
4348 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4349 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4351 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4353 addr = gen_rtx_CONST (Pmode,
4354 gen_rtx_PLUS (Pmode,
4355 XEXP (XEXP (addr, 0), 0),
4356 GEN_INT (v & -16)));
4358 addr = XEXP (XEXP (addr, 0), 0);
4359 p1_lo = GEN_INT (v & 15);
4362 else if (GET_CODE (addr) == CONST_INT)
4364 p1_lo = GEN_INT (INTVAL (addr) & 15);
4365 addr = GEN_INT (INTVAL (addr) & -16);
4369 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4371 scalar = store_with_one_insn_p (ops[0]);
4374 /* We could copy the flags from the ops[0] MEM to mem here,
4375 We don't because we want this load to be optimized away if
4376 possible, and copying the flags will prevent that in certain
4377 cases, e.g. consider the volatile flag. */
4379 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4380 set_mem_alias_set (lmem, 0);
4381 emit_insn (gen_movti (reg, lmem));
4383 if (!p0 || regno_aligned_for_load (REGNO (p0)))
4384 p0 = stack_pointer_rtx;
4388 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4389 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4391 else if (reload_completed)
4393 if (GET_CODE (ops[1]) == REG)
4394 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4395 else if (GET_CODE (ops[1]) == SUBREG)
4396 emit_move_insn (reg,
4397 gen_rtx_REG (GET_MODE (reg),
4398 REGNO (SUBREG_REG (ops[1]))));
4404 if (GET_CODE (ops[1]) == REG)
4405 emit_insn (gen_spu_convert (reg, ops[1]));
4406 else if (GET_CODE (ops[1]) == SUBREG)
4407 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4412 if (GET_MODE_SIZE (mode) < 4 && scalar)
4413 emit_insn (gen_shlqby_ti
4414 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4416 smem = change_address (ops[0], TImode, addr);
4417 /* We can't use the previous alias set because the memory has changed
4418 size and can potentially overlap objects of other types. */
4419 set_mem_alias_set (smem, 0);
4421 emit_insn (gen_movti (smem, reg));
4424 /* Return TRUE if X is MEM which is a struct member reference
4425 and the member can safely be loaded and stored with a single
4426 instruction because it is padded. */
4428 mem_is_padded_component_ref (rtx x)
4430 tree t = MEM_EXPR (x);
4432 if (!t || TREE_CODE (t) != COMPONENT_REF)
4434 t = TREE_OPERAND (t, 1);
4435 if (!t || TREE_CODE (t) != FIELD_DECL
4436 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4438 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4439 r = DECL_FIELD_CONTEXT (t);
4440 if (!r || TREE_CODE (r) != RECORD_TYPE)
4442 /* Make sure they are the same mode */
4443 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4445 /* If there are no following fields then the field alignment assures
4446 the structure is padded to the alignment which means this field is
4448 if (TREE_CHAIN (t) == 0)
4450 /* If the following field is also aligned then this field will be
4453 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4458 /* Parse the -mfixed-range= option string. */
4460 fix_range (const char *const_str)
4463 char *str, *dash, *comma;
4465 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4466 REG2 are either register names or register numbers. The effect
4467 of this option is to mark the registers in the range from REG1 to
4468 REG2 as ``fixed'' so they won't be used by the compiler. */
4470 i = strlen (const_str);
4471 str = (char *) alloca (i + 1);
4472 memcpy (str, const_str, i + 1);
4476 dash = strchr (str, '-');
4479 warning (0, "value of -mfixed-range must have form REG1-REG2");
4483 comma = strchr (dash + 1, ',');
4487 first = decode_reg_name (str);
4490 warning (0, "unknown register name: %s", str);
4494 last = decode_reg_name (dash + 1);
4497 warning (0, "unknown register name: %s", dash + 1);
4505 warning (0, "%s-%s is an empty range", str, dash + 1);
4509 for (i = first; i <= last; ++i)
4510 fixed_regs[i] = call_used_regs[i] = 1;
4521 spu_valid_move (rtx * ops)
4523 enum machine_mode mode = GET_MODE (ops[0]);
4524 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4527 /* init_expr_once tries to recog against load and store insns to set
4528 the direct_load[] and direct_store[] arrays. We always want to
4529 consider those loads and stores valid. init_expr_once is called in
4530 the context of a dummy function which does not have a decl. */
4531 if (cfun->decl == 0)
4534 /* Don't allows loads/stores which would require more than 1 insn.
4535 During and after reload we assume loads and stores only take 1
4537 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4539 if (GET_CODE (ops[0]) == MEM
4540 && (GET_MODE_SIZE (mode) < 4
4541 || !(store_with_one_insn_p (ops[0])
4542 || mem_is_padded_component_ref (ops[0]))))
4544 if (GET_CODE (ops[1]) == MEM
4545 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4551 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4552 can be generated using the fsmbi instruction. */
4554 fsmbi_const_p (rtx x)
4558 /* We can always choose TImode for CONST_INT because the high bits
4559 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4560 enum immediate_class c = classify_immediate (x, TImode);
4561 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4566 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4567 can be generated using the cbd, chd, cwd or cdd instruction. */
4569 cpat_const_p (rtx x, enum machine_mode mode)
4573 enum immediate_class c = classify_immediate (x, mode);
4574 return c == IC_CPAT;
4580 gen_cpat_const (rtx * ops)
4582 unsigned char dst[16];
4583 int i, offset, shift, isize;
4584 if (GET_CODE (ops[3]) != CONST_INT
4585 || GET_CODE (ops[2]) != CONST_INT
4586 || (GET_CODE (ops[1]) != CONST_INT
4587 && GET_CODE (ops[1]) != REG))
4589 if (GET_CODE (ops[1]) == REG
4590 && (!REG_POINTER (ops[1])
4591 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4594 for (i = 0; i < 16; i++)
4596 isize = INTVAL (ops[3]);
4599 else if (isize == 2)
4603 offset = (INTVAL (ops[2]) +
4604 (GET_CODE (ops[1]) ==
4605 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4606 for (i = 0; i < isize; i++)
4607 dst[offset + i] = i + shift;
4608 return array_to_constant (TImode, dst);
4611 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4612 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4613 than 16 bytes, the value is repeated across the rest of the array. */
4615 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4620 memset (arr, 0, 16);
4621 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4622 if (GET_CODE (x) == CONST_INT
4623 || (GET_CODE (x) == CONST_DOUBLE
4624 && (mode == SFmode || mode == DFmode)))
4626 gcc_assert (mode != VOIDmode && mode != BLKmode);
4628 if (GET_CODE (x) == CONST_DOUBLE)
4629 val = const_double_to_hwint (x);
4632 first = GET_MODE_SIZE (mode) - 1;
4633 for (i = first; i >= 0; i--)
4635 arr[i] = val & 0xff;
4638 /* Splat the constant across the whole array. */
4639 for (j = 0, i = first + 1; i < 16; i++)
4642 j = (j == first) ? 0 : j + 1;
4645 else if (GET_CODE (x) == CONST_DOUBLE)
4647 val = CONST_DOUBLE_LOW (x);
4648 for (i = 15; i >= 8; i--)
4650 arr[i] = val & 0xff;
4653 val = CONST_DOUBLE_HIGH (x);
4654 for (i = 7; i >= 0; i--)
4656 arr[i] = val & 0xff;
4660 else if (GET_CODE (x) == CONST_VECTOR)
4664 mode = GET_MODE_INNER (mode);
4665 units = CONST_VECTOR_NUNITS (x);
4666 for (i = 0; i < units; i++)
4668 elt = CONST_VECTOR_ELT (x, i);
4669 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4671 if (GET_CODE (elt) == CONST_DOUBLE)
4672 val = const_double_to_hwint (elt);
4675 first = GET_MODE_SIZE (mode) - 1;
4676 if (first + i * GET_MODE_SIZE (mode) > 16)
4678 for (j = first; j >= 0; j--)
4680 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4690 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4691 smaller than 16 bytes, use the bytes that would represent that value
4692 in a register, e.g., for QImode return the value of arr[3]. */
4694 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4696 enum machine_mode inner_mode;
4698 int units, size, i, j, k;
4701 if (GET_MODE_CLASS (mode) == MODE_INT
4702 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4704 j = GET_MODE_SIZE (mode);
4705 i = j < 4 ? 4 - j : 0;
4706 for (val = 0; i < j; i++)
4707 val = (val << 8) | arr[i];
4708 val = trunc_int_for_mode (val, mode);
4709 return GEN_INT (val);
4715 for (i = high = 0; i < 8; i++)
4716 high = (high << 8) | arr[i];
4717 for (i = 8, val = 0; i < 16; i++)
4718 val = (val << 8) | arr[i];
4719 return immed_double_const (val, high, TImode);
4723 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4724 val = trunc_int_for_mode (val, SImode);
4725 return hwint_to_const_double (SFmode, val);
4729 for (i = 0, val = 0; i < 8; i++)
4730 val = (val << 8) | arr[i];
4731 return hwint_to_const_double (DFmode, val);
4734 if (!VECTOR_MODE_P (mode))
4737 units = GET_MODE_NUNITS (mode);
4738 size = GET_MODE_UNIT_SIZE (mode);
4739 inner_mode = GET_MODE_INNER (mode);
4740 v = rtvec_alloc (units);
4742 for (k = i = 0; i < units; ++i)
4745 for (j = 0; j < size; j++, k++)
4746 val = (val << 8) | arr[k];
4748 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4749 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4751 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4756 return gen_rtx_CONST_VECTOR (mode, v);
4760 reloc_diagnostic (rtx x)
4762 tree loc_decl, decl = 0;
4764 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4767 if (GET_CODE (x) == SYMBOL_REF)
4768 decl = SYMBOL_REF_DECL (x);
4769 else if (GET_CODE (x) == CONST
4770 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4771 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4773 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4774 if (decl && !DECL_P (decl))
4777 /* We use last_assemble_variable_decl to get line information. It's
4778 not always going to be right and might not even be close, but will
4779 be right for the more common cases. */
4780 if (!last_assemble_variable_decl || in_section == ctors_section)
4783 loc_decl = last_assemble_variable_decl;
4785 /* The decl could be a string constant. */
4786 if (decl && DECL_P (decl))
4787 msg = "%Jcreating run-time relocation for %qD";
4789 msg = "creating run-time relocation";
4791 if (TARGET_WARN_RELOC)
4792 warning (0, msg, loc_decl, decl);
4794 error (msg, loc_decl, decl);
4797 /* Hook into assemble_integer so we can generate an error for run-time
4798 relocations. The SPU ABI disallows them. */
4800 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4802 /* By default run-time relocations aren't supported, but we allow them
4803 in case users support it in their own run-time loader. And we provide
4804 a warning for those users that don't. */
4805 if ((GET_CODE (x) == SYMBOL_REF)
4806 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4807 reloc_diagnostic (x);
4809 return default_assemble_integer (x, size, aligned_p);
4813 spu_asm_globalize_label (FILE * file, const char *name)
4815 fputs ("\t.global\t", file);
4816 assemble_name (file, name);
4821 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4822 bool speed ATTRIBUTE_UNUSED)
4824 enum machine_mode mode = GET_MODE (x);
4825 int cost = COSTS_N_INSNS (2);
4827 /* Folding to a CONST_VECTOR will use extra space but there might
4828 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4829 only if it allows us to fold away multiple insns. Changing the cost
4830 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4831 because this cost will only be compared against a single insn.
4832 if (code == CONST_VECTOR)
4833 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4836 /* Use defaults for float operations. Not accurate but good enough. */
4839 *total = COSTS_N_INSNS (13);
4844 *total = COSTS_N_INSNS (6);
4850 if (satisfies_constraint_K (x))
4852 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4853 *total = COSTS_N_INSNS (1);
4855 *total = COSTS_N_INSNS (3);
4859 *total = COSTS_N_INSNS (3);
4864 *total = COSTS_N_INSNS (0);
4868 *total = COSTS_N_INSNS (5);
4872 case FLOAT_TRUNCATE:
4874 case UNSIGNED_FLOAT:
4877 *total = COSTS_N_INSNS (7);
4883 *total = COSTS_N_INSNS (9);
4890 GET_CODE (XEXP (x, 0)) ==
4891 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4892 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4894 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4896 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4897 cost = COSTS_N_INSNS (14);
4898 if ((val & 0xffff) == 0)
4899 cost = COSTS_N_INSNS (9);
4900 else if (val > 0 && val < 0x10000)
4901 cost = COSTS_N_INSNS (11);
4910 *total = COSTS_N_INSNS (20);
4917 *total = COSTS_N_INSNS (4);
4920 if (XINT (x, 1) == UNSPEC_CONVERT)
4921 *total = COSTS_N_INSNS (0);
4923 *total = COSTS_N_INSNS (4);
4926 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4927 if (GET_MODE_CLASS (mode) == MODE_INT
4928 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4929 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4930 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4935 static enum machine_mode
4936 spu_unwind_word_mode (void)
4941 /* Decide whether we can make a sibling call to a function. DECL is the
4942 declaration of the function being targeted by the call and EXP is the
4943 CALL_EXPR representing the call. */
4945 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4947 return decl && !TARGET_LARGE_MEM;
4950 /* We need to correctly update the back chain pointer and the Available
4951 Stack Size (which is in the second slot of the sp register.) */
4953 spu_allocate_stack (rtx op0, rtx op1)
4956 rtx chain = gen_reg_rtx (V4SImode);
4957 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4958 rtx sp = gen_reg_rtx (V4SImode);
4959 rtx splatted = gen_reg_rtx (V4SImode);
4960 rtx pat = gen_reg_rtx (TImode);
4962 /* copy the back chain so we can save it back again. */
4963 emit_move_insn (chain, stack_bot);
4965 op1 = force_reg (SImode, op1);
4967 v = 0x1020300010203ll;
4968 emit_move_insn (pat, immed_double_const (v, v, TImode));
4969 emit_insn (gen_shufb (splatted, op1, op1, pat));
4971 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4972 emit_insn (gen_subv4si3 (sp, sp, splatted));
4974 if (flag_stack_check)
4976 rtx avail = gen_reg_rtx(SImode);
4977 rtx result = gen_reg_rtx(SImode);
4978 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4979 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4980 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4983 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4985 emit_move_insn (stack_bot, chain);
4987 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4991 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4993 static unsigned char arr[16] =
4994 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4995 rtx temp = gen_reg_rtx (SImode);
4996 rtx temp2 = gen_reg_rtx (SImode);
4997 rtx temp3 = gen_reg_rtx (V4SImode);
4998 rtx temp4 = gen_reg_rtx (V4SImode);
4999 rtx pat = gen_reg_rtx (TImode);
5000 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5002 /* Restore the backchain from the first word, sp from the second. */
5003 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5004 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5006 emit_move_insn (pat, array_to_constant (TImode, arr));
5008 /* Compute Available Stack Size for sp */
5009 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5010 emit_insn (gen_shufb (temp3, temp, temp, pat));
5012 /* Compute Available Stack Size for back chain */
5013 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5014 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5015 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5017 emit_insn (gen_addv4si3 (sp, sp, temp3));
5018 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5022 spu_init_libfuncs (void)
5024 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5025 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5026 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5027 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5028 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5029 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5030 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5031 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5032 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5033 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5034 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5036 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5037 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5039 set_optab_libfunc (smul_optab, TImode, "__multi3");
5040 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5041 set_optab_libfunc (smod_optab, TImode, "__modti3");
5042 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5043 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5044 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5047 /* Make a subreg, stripping any existing subreg. We could possibly just
5048 call simplify_subreg, but in this case we know what we want. */
5050 spu_gen_subreg (enum machine_mode mode, rtx x)
5052 if (GET_CODE (x) == SUBREG)
5054 if (GET_MODE (x) == mode)
5056 return gen_rtx_SUBREG (mode, x, 0);
5060 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5062 return (TYPE_MODE (type) == BLKmode
5064 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5065 || int_size_in_bytes (type) >
5066 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5069 /* Create the built-in types and functions */
5071 struct spu_builtin_description spu_builtins[] = {
5072 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5073 {fcode, icode, name, type, params, NULL_TREE},
5074 #include "spu-builtins.def"
5079 spu_init_builtins (void)
5081 struct spu_builtin_description *d;
5084 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5085 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5086 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5087 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5088 V4SF_type_node = build_vector_type (float_type_node, 4);
5089 V2DF_type_node = build_vector_type (double_type_node, 2);
5091 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5092 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5093 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5094 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5096 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5098 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5099 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5100 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5101 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5102 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5103 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5104 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5105 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5106 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5107 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5108 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5109 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5111 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5112 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5113 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5114 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5115 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5116 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5117 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5118 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5120 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5121 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5123 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5125 spu_builtin_types[SPU_BTI_PTR] =
5126 build_pointer_type (build_qualified_type
5128 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5130 /* For each builtin we build a new prototype. The tree code will make
5131 sure nodes are shared. */
5132 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5135 char name[64]; /* build_function will make a copy. */
5141 /* Find last parm. */
5142 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5147 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5149 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5151 sprintf (name, "__builtin_%s", d->name);
5153 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5155 if (d->fcode == SPU_MASK_FOR_LOAD)
5156 TREE_READONLY (d->fndecl) = 1;
5158 /* These builtins don't throw. */
5159 TREE_NOTHROW (d->fndecl) = 1;
5164 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5166 static unsigned char arr[16] =
5167 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5169 rtx temp = gen_reg_rtx (Pmode);
5170 rtx temp2 = gen_reg_rtx (V4SImode);
5171 rtx temp3 = gen_reg_rtx (V4SImode);
5172 rtx pat = gen_reg_rtx (TImode);
5173 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5175 emit_move_insn (pat, array_to_constant (TImode, arr));
5177 /* Restore the sp. */
5178 emit_move_insn (temp, op1);
5179 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5181 /* Compute available stack size for sp. */
5182 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5183 emit_insn (gen_shufb (temp3, temp, temp, pat));
5185 emit_insn (gen_addv4si3 (sp, sp, temp3));
5186 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5190 spu_safe_dma (HOST_WIDE_INT channel)
5192 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5196 spu_builtin_splats (rtx ops[])
5198 enum machine_mode mode = GET_MODE (ops[0]);
5199 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5201 unsigned char arr[16];
5202 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5203 emit_move_insn (ops[0], array_to_constant (mode, arr));
5207 rtx reg = gen_reg_rtx (TImode);
5209 if (GET_CODE (ops[1]) != REG
5210 && GET_CODE (ops[1]) != SUBREG)
5211 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5217 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5223 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5228 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5233 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5239 emit_move_insn (reg, shuf);
5240 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5245 spu_builtin_extract (rtx ops[])
5247 enum machine_mode mode;
5250 mode = GET_MODE (ops[1]);
5252 if (GET_CODE (ops[2]) == CONST_INT)
5257 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5260 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5263 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5266 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5269 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5272 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5280 from = spu_gen_subreg (TImode, ops[1]);
5281 rot = gen_reg_rtx (TImode);
5282 tmp = gen_reg_rtx (SImode);
5287 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5290 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5291 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5295 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5299 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5304 emit_insn (gen_rotqby_ti (rot, from, tmp));
5306 emit_insn (gen_spu_convert (ops[0], rot));
5310 spu_builtin_insert (rtx ops[])
5312 enum machine_mode mode = GET_MODE (ops[0]);
5313 enum machine_mode imode = GET_MODE_INNER (mode);
5314 rtx mask = gen_reg_rtx (TImode);
5317 if (GET_CODE (ops[3]) == CONST_INT)
5318 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5321 offset = gen_reg_rtx (SImode);
5322 emit_insn (gen_mulsi3
5323 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5326 (mask, stack_pointer_rtx, offset,
5327 GEN_INT (GET_MODE_SIZE (imode))));
5328 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5332 spu_builtin_promote (rtx ops[])
5334 enum machine_mode mode, imode;
5335 rtx rot, from, offset;
5338 mode = GET_MODE (ops[0]);
5339 imode = GET_MODE_INNER (mode);
5341 from = gen_reg_rtx (TImode);
5342 rot = spu_gen_subreg (TImode, ops[0]);
5344 emit_insn (gen_spu_convert (from, ops[1]));
5346 if (GET_CODE (ops[2]) == CONST_INT)
5348 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5349 if (GET_MODE_SIZE (imode) < 4)
5350 pos += 4 - GET_MODE_SIZE (imode);
5351 offset = GEN_INT (pos & 15);
5355 offset = gen_reg_rtx (SImode);
5359 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5362 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5363 emit_insn (gen_addsi3 (offset, offset, offset));
5367 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5368 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5372 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5378 emit_insn (gen_rotqby_ti (rot, from, offset));
5382 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5384 rtx shuf = gen_reg_rtx (V4SImode);
5385 rtx insn = gen_reg_rtx (V4SImode);
5390 fnaddr = force_reg (SImode, fnaddr);
5391 cxt = force_reg (SImode, cxt);
5393 if (TARGET_LARGE_MEM)
5395 rtx rotl = gen_reg_rtx (V4SImode);
5396 rtx mask = gen_reg_rtx (V4SImode);
5397 rtx bi = gen_reg_rtx (SImode);
5398 unsigned char shufa[16] = {
5399 2, 3, 0, 1, 18, 19, 16, 17,
5400 0, 1, 2, 3, 16, 17, 18, 19
5402 unsigned char insna[16] = {
5404 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5406 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5409 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5410 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5412 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5413 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5414 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5415 emit_insn (gen_selb (insn, insnc, rotl, mask));
5417 mem = memory_address (Pmode, tramp);
5418 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5420 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5421 mem = memory_address (Pmode, plus_constant (tramp, 16));
5422 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5426 rtx scxt = gen_reg_rtx (SImode);
5427 rtx sfnaddr = gen_reg_rtx (SImode);
5428 unsigned char insna[16] = {
5429 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5435 shufc = gen_reg_rtx (TImode);
5436 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5438 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5439 fits 18 bits and the last 4 are zeros. This will be true if
5440 the stack pointer is initialized to 0x3fff0 at program start,
5441 otherwise the ila instruction will be garbage. */
5443 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5444 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5446 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5447 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5448 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5450 mem = memory_address (Pmode, tramp);
5451 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5454 emit_insn (gen_sync ());
5458 spu_expand_sign_extend (rtx ops[])
5460 unsigned char arr[16];
5461 rtx pat = gen_reg_rtx (TImode);
5464 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5465 if (GET_MODE (ops[1]) == QImode)
5467 sign = gen_reg_rtx (HImode);
5468 emit_insn (gen_extendqihi2 (sign, ops[1]));
5469 for (i = 0; i < 16; i++)
5475 for (i = 0; i < 16; i++)
5477 switch (GET_MODE (ops[1]))
5480 sign = gen_reg_rtx (SImode);
5481 emit_insn (gen_extendhisi2 (sign, ops[1]));
5483 arr[last - 1] = 0x02;
5486 sign = gen_reg_rtx (SImode);
5487 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5488 for (i = 0; i < 4; i++)
5489 arr[last - i] = 3 - i;
5492 sign = gen_reg_rtx (SImode);
5493 c = gen_reg_rtx (SImode);
5494 emit_insn (gen_spu_convert (c, ops[1]));
5495 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5496 for (i = 0; i < 8; i++)
5497 arr[last - i] = 7 - i;
5503 emit_move_insn (pat, array_to_constant (TImode, arr));
5504 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5507 /* expand vector initialization. If there are any constant parts,
5508 load constant parts first. Then load any non-constant parts. */
5510 spu_expand_vector_init (rtx target, rtx vals)
5512 enum machine_mode mode = GET_MODE (target);
5513 int n_elts = GET_MODE_NUNITS (mode);
5515 bool all_same = true;
5516 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5519 first = XVECEXP (vals, 0, 0);
5520 for (i = 0; i < n_elts; ++i)
5522 x = XVECEXP (vals, 0, i);
5523 if (!(CONST_INT_P (x)
5524 || GET_CODE (x) == CONST_DOUBLE
5525 || GET_CODE (x) == CONST_FIXED))
5529 if (first_constant == NULL_RTX)
5532 if (i > 0 && !rtx_equal_p (x, first))
5536 /* if all elements are the same, use splats to repeat elements */
5539 if (!CONSTANT_P (first)
5540 && !register_operand (first, GET_MODE (x)))
5541 first = force_reg (GET_MODE (first), first);
5542 emit_insn (gen_spu_splats (target, first));
5546 /* load constant parts */
5547 if (n_var != n_elts)
5551 emit_move_insn (target,
5552 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5556 rtx constant_parts_rtx = copy_rtx (vals);
5558 gcc_assert (first_constant != NULL_RTX);
5559 /* fill empty slots with the first constant, this increases
5560 our chance of using splats in the recursive call below. */
5561 for (i = 0; i < n_elts; ++i)
5563 x = XVECEXP (constant_parts_rtx, 0, i);
5564 if (!(CONST_INT_P (x)
5565 || GET_CODE (x) == CONST_DOUBLE
5566 || GET_CODE (x) == CONST_FIXED))
5567 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5570 spu_expand_vector_init (target, constant_parts_rtx);
5574 /* load variable parts */
5577 rtx insert_operands[4];
5579 insert_operands[0] = target;
5580 insert_operands[2] = target;
5581 for (i = 0; i < n_elts; ++i)
5583 x = XVECEXP (vals, 0, i);
5584 if (!(CONST_INT_P (x)
5585 || GET_CODE (x) == CONST_DOUBLE
5586 || GET_CODE (x) == CONST_FIXED))
5588 if (!register_operand (x, GET_MODE (x)))
5589 x = force_reg (GET_MODE (x), x);
5590 insert_operands[1] = x;
5591 insert_operands[3] = GEN_INT (i);
5592 spu_builtin_insert (insert_operands);
5598 /* Return insn index for the vector compare instruction for given CODE,
5599 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5602 get_vec_cmp_insn (enum rtx_code code,
5603 enum machine_mode dest_mode,
5604 enum machine_mode op_mode)
5610 if (dest_mode == V16QImode && op_mode == V16QImode)
5611 return CODE_FOR_ceq_v16qi;
5612 if (dest_mode == V8HImode && op_mode == V8HImode)
5613 return CODE_FOR_ceq_v8hi;
5614 if (dest_mode == V4SImode && op_mode == V4SImode)
5615 return CODE_FOR_ceq_v4si;
5616 if (dest_mode == V4SImode && op_mode == V4SFmode)
5617 return CODE_FOR_ceq_v4sf;
5618 if (dest_mode == V2DImode && op_mode == V2DFmode)
5619 return CODE_FOR_ceq_v2df;
5622 if (dest_mode == V16QImode && op_mode == V16QImode)
5623 return CODE_FOR_cgt_v16qi;
5624 if (dest_mode == V8HImode && op_mode == V8HImode)
5625 return CODE_FOR_cgt_v8hi;
5626 if (dest_mode == V4SImode && op_mode == V4SImode)
5627 return CODE_FOR_cgt_v4si;
5628 if (dest_mode == V4SImode && op_mode == V4SFmode)
5629 return CODE_FOR_cgt_v4sf;
5630 if (dest_mode == V2DImode && op_mode == V2DFmode)
5631 return CODE_FOR_cgt_v2df;
5634 if (dest_mode == V16QImode && op_mode == V16QImode)
5635 return CODE_FOR_clgt_v16qi;
5636 if (dest_mode == V8HImode && op_mode == V8HImode)
5637 return CODE_FOR_clgt_v8hi;
5638 if (dest_mode == V4SImode && op_mode == V4SImode)
5639 return CODE_FOR_clgt_v4si;
5647 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5648 DMODE is expected destination mode. This is a recursive function. */
5651 spu_emit_vector_compare (enum rtx_code rcode,
5653 enum machine_mode dmode)
5657 enum machine_mode dest_mode;
5658 enum machine_mode op_mode = GET_MODE (op1);
5660 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5662 /* Floating point vector compare instructions uses destination V4SImode.
5663 Double floating point vector compare instructions uses destination V2DImode.
5664 Move destination to appropriate mode later. */
5665 if (dmode == V4SFmode)
5666 dest_mode = V4SImode;
5667 else if (dmode == V2DFmode)
5668 dest_mode = V2DImode;
5672 mask = gen_reg_rtx (dest_mode);
5673 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5675 if (vec_cmp_insn == -1)
5677 bool swap_operands = false;
5678 bool try_again = false;
5683 swap_operands = true;
5688 swap_operands = true;
5692 /* Treat A != B as ~(A==B). */
5694 enum insn_code nor_code;
5695 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5696 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5697 gcc_assert (nor_code != CODE_FOR_nothing);
5698 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5699 if (dmode != dest_mode)
5701 rtx temp = gen_reg_rtx (dest_mode);
5702 convert_move (temp, mask, 0);
5712 /* Try GT/GTU/LT/LTU OR EQ */
5715 enum insn_code ior_code;
5716 enum rtx_code new_code;
5720 case GE: new_code = GT; break;
5721 case GEU: new_code = GTU; break;
5722 case LE: new_code = LT; break;
5723 case LEU: new_code = LTU; break;
5728 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5729 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5731 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5732 gcc_assert (ior_code != CODE_FOR_nothing);
5733 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5734 if (dmode != dest_mode)
5736 rtx temp = gen_reg_rtx (dest_mode);
5737 convert_move (temp, mask, 0);
5747 /* You only get two chances. */
5749 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5751 gcc_assert (vec_cmp_insn != -1);
5762 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5763 if (dmode != dest_mode)
5765 rtx temp = gen_reg_rtx (dest_mode);
5766 convert_move (temp, mask, 0);
5773 /* Emit vector conditional expression.
5774 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5775 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5778 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5779 rtx cond, rtx cc_op0, rtx cc_op1)
5781 enum machine_mode dest_mode = GET_MODE (dest);
5782 enum rtx_code rcode = GET_CODE (cond);
5785 /* Get the vector mask for the given relational operations. */
5786 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5788 emit_insn(gen_selb (dest, op2, op1, mask));
5794 spu_force_reg (enum machine_mode mode, rtx op)
5797 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5799 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5800 || GET_MODE (op) == BLKmode)
5801 return force_reg (mode, convert_to_mode (mode, op, 0));
5805 r = force_reg (GET_MODE (op), op);
5806 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5808 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5813 x = gen_reg_rtx (mode);
5814 emit_insn (gen_spu_convert (x, r));
5819 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5821 HOST_WIDE_INT v = 0;
5823 /* Check the range of immediate operands. */
5824 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5826 int range = p - SPU_BTI_7;
5828 if (!CONSTANT_P (op))
5829 error ("%s expects an integer literal in the range [%d, %d].",
5831 spu_builtin_range[range].low, spu_builtin_range[range].high);
5833 if (GET_CODE (op) == CONST
5834 && (GET_CODE (XEXP (op, 0)) == PLUS
5835 || GET_CODE (XEXP (op, 0)) == MINUS))
5837 v = INTVAL (XEXP (XEXP (op, 0), 1));
5838 op = XEXP (XEXP (op, 0), 0);
5840 else if (GET_CODE (op) == CONST_INT)
5842 else if (GET_CODE (op) == CONST_VECTOR
5843 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5844 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5846 /* The default for v is 0 which is valid in every range. */
5847 if (v < spu_builtin_range[range].low
5848 || v > spu_builtin_range[range].high)
5849 error ("%s expects an integer literal in the range [%d, %d]. ("
5850 HOST_WIDE_INT_PRINT_DEC ")",
5852 spu_builtin_range[range].low, spu_builtin_range[range].high,
5861 /* This is only used in lqa, and stqa. Even though the insns
5862 encode 16 bits of the address (all but the 2 least
5863 significant), only 14 bits are used because it is masked to
5864 be 16 byte aligned. */
5868 /* This is used for lqr and stqr. */
5875 if (GET_CODE (op) == LABEL_REF
5876 || (GET_CODE (op) == SYMBOL_REF
5877 && SYMBOL_REF_FUNCTION_P (op))
5878 || (v & ((1 << lsbits) - 1)) != 0)
5879 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5886 expand_builtin_args (struct spu_builtin_description *d, tree exp,
5887 rtx target, rtx ops[])
5889 enum insn_code icode = d->icode;
5892 /* Expand the arguments into rtl. */
5894 if (d->parm[0] != SPU_BTI_VOID)
5897 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
5899 tree arg = CALL_EXPR_ARG (exp, a);
5902 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
5905 /* The insn pattern may have additional operands (SCRATCH).
5906 Return the number of actual non-SCRATCH operands. */
5907 gcc_assert (i <= insn_data[icode].n_operands);
5912 spu_expand_builtin_1 (struct spu_builtin_description *d,
5913 tree exp, rtx target)
5917 enum insn_code icode = d->icode;
5918 enum machine_mode mode, tmode;
5923 /* Set up ops[] with values from arglist. */
5924 n_operands = expand_builtin_args (d, exp, target, ops);
5926 /* Handle the target operand which must be operand 0. */
5928 if (d->parm[0] != SPU_BTI_VOID)
5931 /* We prefer the mode specified for the match_operand otherwise
5932 use the mode from the builtin function prototype. */
5933 tmode = insn_data[d->icode].operand[0].mode;
5934 if (tmode == VOIDmode)
5935 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5937 /* Try to use target because not using it can lead to extra copies
5938 and when we are using all of the registers extra copies leads
5940 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5943 target = ops[0] = gen_reg_rtx (tmode);
5945 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5951 if (d->fcode == SPU_MASK_FOR_LOAD)
5953 enum machine_mode mode = insn_data[icode].operand[1].mode;
5958 arg = CALL_EXPR_ARG (exp, 0);
5959 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5960 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5961 addr = memory_address (mode, op);
5964 op = gen_reg_rtx (GET_MODE (addr));
5965 emit_insn (gen_rtx_SET (VOIDmode, op,
5966 gen_rtx_NEG (GET_MODE (addr), addr)));
5967 op = gen_rtx_MEM (mode, op);
5969 pat = GEN_FCN (icode) (target, op);
5976 /* Ignore align_hint, but still expand it's args in case they have
5978 if (icode == CODE_FOR_spu_align_hint)
5981 /* Handle the rest of the operands. */
5982 for (p = 1; i < n_operands; i++, p++)
5984 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5985 mode = insn_data[d->icode].operand[i].mode;
5987 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5989 /* mode can be VOIDmode here for labels */
5991 /* For specific intrinsics with an immediate operand, e.g.,
5992 si_ai(), we sometimes need to convert the scalar argument to a
5993 vector argument by splatting the scalar. */
5994 if (VECTOR_MODE_P (mode)
5995 && (GET_CODE (ops[i]) == CONST_INT
5996 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5997 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5999 if (GET_CODE (ops[i]) == CONST_INT)
6000 ops[i] = spu_const (mode, INTVAL (ops[i]));
6003 rtx reg = gen_reg_rtx (mode);
6004 enum machine_mode imode = GET_MODE_INNER (mode);
6005 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6006 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6007 if (imode != GET_MODE (ops[i]))
6008 ops[i] = convert_to_mode (imode, ops[i],
6009 TYPE_UNSIGNED (spu_builtin_types
6011 emit_insn (gen_spu_splats (reg, ops[i]));
6016 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6018 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6019 ops[i] = spu_force_reg (mode, ops[i]);
6025 pat = GEN_FCN (icode) (0);
6028 pat = GEN_FCN (icode) (ops[0]);
6031 pat = GEN_FCN (icode) (ops[0], ops[1]);
6034 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6037 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6040 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6043 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6052 if (d->type == B_CALL || d->type == B_BISLED)
6053 emit_call_insn (pat);
6054 else if (d->type == B_JUMP)
6056 emit_jump_insn (pat);
6062 return_type = spu_builtin_types[d->parm[0]];
6063 if (d->parm[0] != SPU_BTI_VOID
6064 && GET_MODE (target) != TYPE_MODE (return_type))
6066 /* target is the return value. It should always be the mode of
6067 the builtin function prototype. */
6068 target = spu_force_reg (TYPE_MODE (return_type), target);
6075 spu_expand_builtin (tree exp,
6077 rtx subtarget ATTRIBUTE_UNUSED,
6078 enum machine_mode mode ATTRIBUTE_UNUSED,
6079 int ignore ATTRIBUTE_UNUSED)
6081 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6082 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6083 struct spu_builtin_description *d;
6085 if (fcode < NUM_SPU_BUILTINS)
6087 d = &spu_builtins[fcode];
6089 return spu_expand_builtin_1 (d, exp, target);
6094 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6096 spu_builtin_mul_widen_even (tree type)
6098 switch (TYPE_MODE (type))
6101 if (TYPE_UNSIGNED (type))
6102 return spu_builtins[SPU_MULE_0].fndecl;
6104 return spu_builtins[SPU_MULE_1].fndecl;
6111 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6113 spu_builtin_mul_widen_odd (tree type)
6115 switch (TYPE_MODE (type))
6118 if (TYPE_UNSIGNED (type))
6119 return spu_builtins[SPU_MULO_1].fndecl;
6121 return spu_builtins[SPU_MULO_0].fndecl;
6128 /* Implement targetm.vectorize.builtin_mask_for_load. */
6130 spu_builtin_mask_for_load (void)
6132 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6137 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6139 spu_builtin_vectorization_cost (bool runtime_test)
6141 /* If the branch of the runtime test is taken - i.e. - the vectorized
6142 version is skipped - this incurs a misprediction cost (because the
6143 vectorized version is expected to be the fall-through). So we subtract
6144 the latency of a mispredicted branch from the costs that are incurred
6145 when the vectorized version is executed. */
6152 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6153 after applying N number of iterations. This routine does not determine
6154 how may iterations are required to reach desired alignment. */
6157 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6162 /* All other types are naturally aligned. */
6166 /* Implement targetm.vectorize.builtin_vec_perm. */
6168 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6170 struct spu_builtin_description *d;
6172 *mask_element_type = unsigned_char_type_node;
6174 switch (TYPE_MODE (type))
6177 if (TYPE_UNSIGNED (type))
6178 d = &spu_builtins[SPU_SHUFFLE_0];
6180 d = &spu_builtins[SPU_SHUFFLE_1];
6184 if (TYPE_UNSIGNED (type))
6185 d = &spu_builtins[SPU_SHUFFLE_2];
6187 d = &spu_builtins[SPU_SHUFFLE_3];
6191 if (TYPE_UNSIGNED (type))
6192 d = &spu_builtins[SPU_SHUFFLE_4];
6194 d = &spu_builtins[SPU_SHUFFLE_5];
6198 if (TYPE_UNSIGNED (type))
6199 d = &spu_builtins[SPU_SHUFFLE_6];
6201 d = &spu_builtins[SPU_SHUFFLE_7];
6205 d = &spu_builtins[SPU_SHUFFLE_8];
6209 d = &spu_builtins[SPU_SHUFFLE_9];
6220 /* Count the total number of instructions in each pipe and return the
6221 maximum, which is used as the Minimum Iteration Interval (MII)
6222 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6223 -2 are instructions that can go in pipe0 or pipe1. */
6225 spu_sms_res_mii (struct ddg *g)
6228 unsigned t[4] = {0, 0, 0, 0};
6230 for (i = 0; i < g->num_nodes; i++)
6232 rtx insn = g->nodes[i].insn;
6233 int p = get_pipe (insn) + 2;
6239 if (dump_file && INSN_P (insn))
6240 fprintf (dump_file, "i%d %s %d %d\n",
6242 insn_data[INSN_CODE(insn)].name,
6246 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6248 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6253 spu_init_expanders (void)
6255 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6256 * frame_pointer_needed is true. We don't know that until we're
6257 * expanding the prologue. */
6259 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6262 static enum machine_mode
6263 spu_libgcc_cmp_return_mode (void)
6266 /* For SPU word mode is TI mode so it is better to use SImode
6267 for compare returns. */
6271 static enum machine_mode
6272 spu_libgcc_shift_count_mode (void)
6274 /* For SPU word mode is TI mode so it is better to use SImode
6275 for shift counts. */
6279 /* An early place to adjust some flags after GCC has finished processing
6282 asm_file_start (void)
6284 /* Variable tracking should be run after all optimizations which
6285 change order of insns. It also needs a valid CFG. */
6286 spu_flag_var_tracking = flag_var_tracking;
6287 flag_var_tracking = 0;
6289 default_file_start ();
6292 /* Implement targetm.section_type_flags. */
6294 spu_section_type_flags (tree decl, const char *name, int reloc)
6296 /* .toe needs to have type @nobits. */
6297 if (strcmp (name, ".toe") == 0)
6299 return default_section_type_flags (decl, name, reloc);