1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
36 #include "basic-block.h"
37 #include "integrate.h"
43 #include "target-def.h"
44 #include "langhooks.h"
46 #include "cfglayout.h"
47 #include "sched-int.h"
52 #include "tm-constrs.h"
58 /* Builtin types, data and prototypes. */
60 enum spu_builtin_type_index
62 SPU_BTI_END_OF_PARAMS,
64 /* We create new type nodes for these. */
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
79 /* These all correspond to intSI_type_node */
93 /* These correspond to the standard types */
113 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126 struct spu_builtin_range
131 static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
147 /* Target specific attribute specifications. */
148 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150 /* Prototypes and external defs. */
151 static void spu_init_builtins (void);
152 static tree spu_builtin_decl (unsigned, bool);
153 static bool spu_scalar_mode_supported_p (enum machine_mode mode);
154 static bool spu_vector_mode_supported_p (enum machine_mode mode);
155 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
156 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
158 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
159 static rtx get_pic_reg (void);
160 static int need_to_save_reg (int regno, int saving);
161 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
162 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
163 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
165 static void emit_nop_for_insn (rtx insn);
166 static bool insn_clobbers_hbr (rtx insn);
167 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
168 int distance, sbitmap blocks);
169 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
170 enum machine_mode dmode);
171 static rtx get_branch_target (rtx branch);
172 static void spu_machine_dependent_reorg (void);
173 static int spu_sched_issue_rate (void);
174 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
176 static int get_pipe (rtx insn);
177 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
178 static void spu_sched_init_global (FILE *, int, int);
179 static void spu_sched_init (FILE *, int, int);
180 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
181 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 static int spu_naked_function_p (tree func);
188 static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
189 const_tree type, bool named);
190 static tree spu_build_builtin_va_list (void);
191 static void spu_va_start (tree, rtx);
192 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
193 gimple_seq * pre_p, gimple_seq * post_p);
194 static int store_with_one_insn_p (rtx mem);
195 static int mem_is_padded_component_ref (rtx x);
196 static int reg_aligned_for_addr (rtx x);
197 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
198 static void spu_asm_globalize_label (FILE * file, const char *name);
199 static bool spu_rtx_costs (rtx x, int code, int outer_code,
200 int *total, bool speed);
201 static bool spu_function_ok_for_sibcall (tree decl, tree exp);
202 static void spu_init_libfuncs (void);
203 static bool spu_return_in_memory (const_tree type, const_tree fntype);
204 static void fix_range (const char *);
205 static void spu_encode_section_info (tree, rtx, int);
206 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
207 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
209 static tree spu_builtin_mul_widen_even (tree);
210 static tree spu_builtin_mul_widen_odd (tree);
211 static tree spu_builtin_mask_for_load (void);
212 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
213 static bool spu_vector_alignment_reachable (const_tree, bool);
214 static tree spu_builtin_vec_perm (tree, tree *);
215 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
216 static enum machine_mode spu_addr_space_address_mode (addr_space_t);
217 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
218 static rtx spu_addr_space_convert (rtx, tree, tree);
219 static int spu_sms_res_mii (struct ddg *g);
220 static void asm_file_start (void);
221 static unsigned int spu_section_type_flags (tree, const char *, int);
222 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
223 static void spu_unique_section (tree, int);
224 static rtx spu_expand_load (rtx, rtx, rtx, int);
225 static void spu_trampoline_init (rtx, tree, rtx);
227 extern const char *reg_names[];
229 /* Which instruction set architecture to use. */
231 /* Which cpu are we tuning for. */
234 /* The hardware requires 8 insns between a hint and the branch it
235 effects. This variable describes how many rtl instructions the
236 compiler needs to see before inserting a hint, and then the compiler
237 will insert enough nops to make it at least 8 insns. The default is
238 for the compiler to allow up to 2 nops be emitted. The nops are
239 inserted in pairs, so we round down. */
240 int spu_hint_dist = (8*4) - (2*4);
242 /* Determines whether we run variable tracking in machine dependent
244 static int spu_flag_var_tracking;
259 IC_POOL, /* constant pool */
260 IC_IL1, /* one il* instruction */
261 IC_IL2, /* both ilhu and iohl instructions */
262 IC_IL1s, /* one il* instruction */
263 IC_IL2s, /* both ilhu and iohl instructions */
264 IC_FSMBI, /* the fsmbi instruction */
265 IC_CPAT, /* one of the c*d instructions */
266 IC_FSMBI2 /* fsmbi plus 1 other instruction */
269 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
270 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
271 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
272 static enum immediate_class classify_immediate (rtx op,
273 enum machine_mode mode);
275 static enum machine_mode spu_unwind_word_mode (void);
277 static enum machine_mode
278 spu_libgcc_cmp_return_mode (void);
280 static enum machine_mode
281 spu_libgcc_shift_count_mode (void);
283 /* Pointer mode for __ea references. */
284 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
287 /* Table of machine attributes. */
288 static const struct attribute_spec spu_attribute_table[] =
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
291 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
292 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
293 { NULL, 0, 0, false, false, false, NULL }
296 /* TARGET overrides. */
298 #undef TARGET_ADDR_SPACE_POINTER_MODE
299 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
301 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
302 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
304 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
305 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
306 spu_addr_space_legitimate_address_p
308 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
309 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
311 #undef TARGET_ADDR_SPACE_SUBSET_P
312 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
314 #undef TARGET_ADDR_SPACE_CONVERT
315 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
317 #undef TARGET_INIT_BUILTINS
318 #define TARGET_INIT_BUILTINS spu_init_builtins
319 #undef TARGET_BUILTIN_DECL
320 #define TARGET_BUILTIN_DECL spu_builtin_decl
322 #undef TARGET_EXPAND_BUILTIN
323 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
325 #undef TARGET_UNWIND_WORD_MODE
326 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
328 #undef TARGET_LEGITIMIZE_ADDRESS
329 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
331 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
332 and .quad for the debugger. When it is known that the assembler is fixed,
333 these can be removed. */
334 #undef TARGET_ASM_UNALIGNED_SI_OP
335 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
337 #undef TARGET_ASM_ALIGNED_DI_OP
338 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
340 /* The .8byte directive doesn't seem to work well for a 32 bit
342 #undef TARGET_ASM_UNALIGNED_DI_OP
343 #define TARGET_ASM_UNALIGNED_DI_OP NULL
345 #undef TARGET_RTX_COSTS
346 #define TARGET_RTX_COSTS spu_rtx_costs
348 #undef TARGET_ADDRESS_COST
349 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
351 #undef TARGET_SCHED_ISSUE_RATE
352 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
354 #undef TARGET_SCHED_INIT_GLOBAL
355 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
357 #undef TARGET_SCHED_INIT
358 #define TARGET_SCHED_INIT spu_sched_init
360 #undef TARGET_SCHED_VARIABLE_ISSUE
361 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
363 #undef TARGET_SCHED_REORDER
364 #define TARGET_SCHED_REORDER spu_sched_reorder
366 #undef TARGET_SCHED_REORDER2
367 #define TARGET_SCHED_REORDER2 spu_sched_reorder
369 #undef TARGET_SCHED_ADJUST_COST
370 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
375 #undef TARGET_ASM_INTEGER
376 #define TARGET_ASM_INTEGER spu_assemble_integer
378 #undef TARGET_SCALAR_MODE_SUPPORTED_P
379 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
381 #undef TARGET_VECTOR_MODE_SUPPORTED_P
382 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
384 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
385 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
387 #undef TARGET_ASM_GLOBALIZE_LABEL
388 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
390 #undef TARGET_PASS_BY_REFERENCE
391 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
393 #undef TARGET_MUST_PASS_IN_STACK
394 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
396 #undef TARGET_BUILD_BUILTIN_VA_LIST
397 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
399 #undef TARGET_EXPAND_BUILTIN_VA_START
400 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
402 #undef TARGET_SETUP_INCOMING_VARARGS
403 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
405 #undef TARGET_MACHINE_DEPENDENT_REORG
406 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
408 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
409 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
411 #undef TARGET_DEFAULT_TARGET_FLAGS
412 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
414 #undef TARGET_INIT_LIBFUNCS
415 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
417 #undef TARGET_RETURN_IN_MEMORY
418 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
420 #undef TARGET_ENCODE_SECTION_INFO
421 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
423 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
424 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
426 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
427 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
429 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
430 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
435 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
436 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
438 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
439 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
441 #undef TARGET_LIBGCC_CMP_RETURN_MODE
442 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
444 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
445 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
447 #undef TARGET_SCHED_SMS_RES_MII
448 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
450 #undef TARGET_ASM_FILE_START
451 #define TARGET_ASM_FILE_START asm_file_start
453 #undef TARGET_SECTION_TYPE_FLAGS
454 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
456 #undef TARGET_ASM_SELECT_SECTION
457 #define TARGET_ASM_SELECT_SECTION spu_select_section
459 #undef TARGET_ASM_UNIQUE_SECTION
460 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
462 #undef TARGET_LEGITIMATE_ADDRESS_P
463 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
465 #undef TARGET_TRAMPOLINE_INIT
466 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
468 struct gcc_target targetm = TARGET_INITIALIZER;
471 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
473 /* Override some of the default param values. With so many registers
474 larger values are better for these params. */
475 MAX_PENDING_LIST_LENGTH = 128;
477 /* With so many registers this is better on by default. */
478 flag_rename_registers = 1;
481 /* Sometimes certain combinations of command options do not make sense
482 on a particular target machine. You can define a macro
483 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
484 executed once just after all the command options have been parsed. */
486 spu_override_options (void)
488 /* Small loops will be unpeeled at -O3. For SPU it is more important
489 to keep code small by default. */
490 if (!flag_unroll_loops && !flag_peel_loops
491 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
492 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
494 flag_omit_frame_pointer = 1;
496 /* Functions must be 8 byte aligned so we correctly handle dual issue */
497 if (align_functions < 8)
500 spu_hint_dist = 8*4 - spu_max_nops*4;
501 if (spu_hint_dist < 0)
504 if (spu_fixed_range_string)
505 fix_range (spu_fixed_range_string);
507 /* Determine processor architectural level. */
510 if (strcmp (&spu_arch_string[0], "cell") == 0)
511 spu_arch = PROCESSOR_CELL;
512 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
513 spu_arch = PROCESSOR_CELLEDP;
515 error ("Unknown architecture '%s'", &spu_arch_string[0]);
518 /* Determine processor to tune for. */
521 if (strcmp (&spu_tune_string[0], "cell") == 0)
522 spu_tune = PROCESSOR_CELL;
523 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
524 spu_tune = PROCESSOR_CELLEDP;
526 error ("Unknown architecture '%s'", &spu_tune_string[0]);
529 /* Change defaults according to the processor architecture. */
530 if (spu_arch == PROCESSOR_CELLEDP)
532 /* If no command line option has been otherwise specified, change
533 the default to -mno-safe-hints on celledp -- only the original
534 Cell/B.E. processors require this workaround. */
535 if (!(target_flags_explicit & MASK_SAFE_HINTS))
536 target_flags &= ~MASK_SAFE_HINTS;
539 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
542 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
543 struct attribute_spec.handler. */
545 /* True if MODE is valid for the target. By "valid", we mean able to
546 be manipulated in non-trivial ways. In particular, this means all
547 the arithmetic is supported. */
549 spu_scalar_mode_supported_p (enum machine_mode mode)
567 /* Similarly for vector modes. "Supported" here is less strict. At
568 least some operations are supported; need to check optabs or builtins
569 for further details. */
571 spu_vector_mode_supported_p (enum machine_mode mode)
588 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
589 least significant bytes of the outer mode. This function returns
590 TRUE for the SUBREG's where this is correct. */
592 valid_subreg (rtx op)
594 enum machine_mode om = GET_MODE (op);
595 enum machine_mode im = GET_MODE (SUBREG_REG (op));
596 return om != VOIDmode && im != VOIDmode
597 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
598 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
599 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
602 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
603 and adjust the start offset. */
605 adjust_operand (rtx op, HOST_WIDE_INT * start)
607 enum machine_mode mode;
609 /* Strip any paradoxical SUBREG. */
610 if (GET_CODE (op) == SUBREG
611 && (GET_MODE_BITSIZE (GET_MODE (op))
612 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
616 GET_MODE_BITSIZE (GET_MODE (op)) -
617 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
618 op = SUBREG_REG (op);
620 /* If it is smaller than SI, assure a SUBREG */
621 op_size = GET_MODE_BITSIZE (GET_MODE (op));
625 *start += 32 - op_size;
628 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
629 mode = mode_for_size (op_size, MODE_INT, 0);
630 if (mode != GET_MODE (op))
631 op = gen_rtx_SUBREG (mode, op, 0);
636 spu_expand_extv (rtx ops[], int unsignedp)
638 rtx dst = ops[0], src = ops[1];
639 HOST_WIDE_INT width = INTVAL (ops[2]);
640 HOST_WIDE_INT start = INTVAL (ops[3]);
641 HOST_WIDE_INT align_mask;
642 rtx s0, s1, mask, r0;
644 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
648 /* First, determine if we need 1 TImode load or 2. We need only 1
649 if the bits being extracted do not cross the alignment boundary
650 as determined by the MEM and its address. */
652 align_mask = -MEM_ALIGN (src);
653 if ((start & align_mask) == ((start + width - 1) & align_mask))
655 /* Alignment is sufficient for 1 load. */
656 s0 = gen_reg_rtx (TImode);
657 r0 = spu_expand_load (s0, 0, src, start / 8);
660 emit_insn (gen_rotqby_ti (s0, s0, r0));
665 s0 = gen_reg_rtx (TImode);
666 s1 = gen_reg_rtx (TImode);
667 r0 = spu_expand_load (s0, s1, src, start / 8);
670 gcc_assert (start + width <= 128);
673 rtx r1 = gen_reg_rtx (SImode);
674 mask = gen_reg_rtx (TImode);
675 emit_move_insn (mask, GEN_INT (-1));
676 emit_insn (gen_rotqby_ti (s0, s0, r0));
677 emit_insn (gen_rotqby_ti (s1, s1, r0));
678 if (GET_CODE (r0) == CONST_INT)
679 r1 = GEN_INT (INTVAL (r0) & 15);
681 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
682 emit_insn (gen_shlqby_ti (mask, mask, r1));
683 emit_insn (gen_selb (s0, s1, s0, mask));
688 else if (GET_CODE (src) == SUBREG)
690 rtx r = SUBREG_REG (src);
691 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
692 s0 = gen_reg_rtx (TImode);
693 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
694 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
696 emit_move_insn (s0, src);
700 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
701 s0 = gen_reg_rtx (TImode);
702 emit_move_insn (s0, src);
705 /* Now s0 is TImode and contains the bits to extract at start. */
708 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
712 tree c = build_int_cst (NULL_TREE, 128 - width);
713 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
716 emit_move_insn (dst, s0);
720 spu_expand_insv (rtx ops[])
722 HOST_WIDE_INT width = INTVAL (ops[1]);
723 HOST_WIDE_INT start = INTVAL (ops[2]);
724 HOST_WIDE_INT maskbits;
725 enum machine_mode dst_mode, src_mode;
726 rtx dst = ops[0], src = ops[3];
727 int dst_size, src_size;
733 if (GET_CODE (ops[0]) == MEM)
734 dst = gen_reg_rtx (TImode);
736 dst = adjust_operand (dst, &start);
737 dst_mode = GET_MODE (dst);
738 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
740 if (CONSTANT_P (src))
742 enum machine_mode m =
743 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
744 src = force_reg (m, convert_to_mode (m, src, 0));
746 src = adjust_operand (src, 0);
747 src_mode = GET_MODE (src);
748 src_size = GET_MODE_BITSIZE (GET_MODE (src));
750 mask = gen_reg_rtx (dst_mode);
751 shift_reg = gen_reg_rtx (dst_mode);
752 shift = dst_size - start - width;
754 /* It's not safe to use subreg here because the compiler assumes
755 that the SUBREG_REG is right justified in the SUBREG. */
756 convert_move (shift_reg, src, 1);
763 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
766 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
769 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
781 maskbits = (-1ll << (32 - width - start));
783 maskbits += (1ll << (32 - start));
784 emit_move_insn (mask, GEN_INT (maskbits));
787 maskbits = (-1ll << (64 - width - start));
789 maskbits += (1ll << (64 - start));
790 emit_move_insn (mask, GEN_INT (maskbits));
794 unsigned char arr[16];
796 memset (arr, 0, sizeof (arr));
797 arr[i] = 0xff >> (start & 7);
798 for (i++; i <= (start + width - 1) / 8; i++)
800 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
801 emit_move_insn (mask, array_to_constant (TImode, arr));
807 if (GET_CODE (ops[0]) == MEM)
809 rtx low = gen_reg_rtx (SImode);
810 rtx rotl = gen_reg_rtx (SImode);
811 rtx mask0 = gen_reg_rtx (TImode);
817 addr = force_reg (Pmode, XEXP (ops[0], 0));
818 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
819 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
820 emit_insn (gen_negsi2 (rotl, low));
821 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
822 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
823 mem = change_address (ops[0], TImode, addr0);
824 set_mem_alias_set (mem, 0);
825 emit_move_insn (dst, mem);
826 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
827 if (start + width > MEM_ALIGN (ops[0]))
829 rtx shl = gen_reg_rtx (SImode);
830 rtx mask1 = gen_reg_rtx (TImode);
831 rtx dst1 = gen_reg_rtx (TImode);
833 addr1 = plus_constant (addr, 16);
834 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
835 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
836 emit_insn (gen_shlqby_ti (mask1, mask, shl));
837 mem1 = change_address (ops[0], TImode, addr1);
838 set_mem_alias_set (mem1, 0);
839 emit_move_insn (dst1, mem1);
840 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
841 emit_move_insn (mem1, dst1);
843 emit_move_insn (mem, dst);
846 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
851 spu_expand_block_move (rtx ops[])
853 HOST_WIDE_INT bytes, align, offset;
854 rtx src, dst, sreg, dreg, target;
856 if (GET_CODE (ops[2]) != CONST_INT
857 || GET_CODE (ops[3]) != CONST_INT
858 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
861 bytes = INTVAL (ops[2]);
862 align = INTVAL (ops[3]);
872 for (offset = 0; offset + 16 <= bytes; offset += 16)
874 dst = adjust_address (ops[0], V16QImode, offset);
875 src = adjust_address (ops[1], V16QImode, offset);
876 emit_move_insn (dst, src);
881 unsigned char arr[16] = { 0 };
882 for (i = 0; i < bytes - offset; i++)
884 dst = adjust_address (ops[0], V16QImode, offset);
885 src = adjust_address (ops[1], V16QImode, offset);
886 mask = gen_reg_rtx (V16QImode);
887 sreg = gen_reg_rtx (V16QImode);
888 dreg = gen_reg_rtx (V16QImode);
889 target = gen_reg_rtx (V16QImode);
890 emit_move_insn (mask, array_to_constant (V16QImode, arr));
891 emit_move_insn (dreg, dst);
892 emit_move_insn (sreg, src);
893 emit_insn (gen_selb (target, dreg, sreg, mask));
894 emit_move_insn (dst, target);
902 { SPU_EQ, SPU_GT, SPU_GTU };
904 int spu_comp_icode[12][3] = {
905 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
906 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
907 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
908 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
909 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
910 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
911 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
912 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
913 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
914 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
915 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
916 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
919 /* Generate a compare for CODE. Return a brand-new rtx that represents
920 the result of the compare. GCC can figure this out too if we don't
921 provide all variations of compares, but GCC always wants to use
922 WORD_MODE, we can generate better code in most cases if we do it
925 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
927 int reverse_compare = 0;
928 int reverse_test = 0;
929 rtx compare_result, eq_result;
930 rtx comp_rtx, eq_rtx;
931 enum machine_mode comp_mode;
932 enum machine_mode op_mode;
933 enum spu_comp_code scode, eq_code;
934 enum insn_code ior_code;
935 enum rtx_code code = GET_CODE (cmp);
936 rtx op0 = XEXP (cmp, 0);
937 rtx op1 = XEXP (cmp, 1);
941 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
942 and so on, to keep the constant in operand 1. */
943 if (GET_CODE (op1) == CONST_INT)
945 HOST_WIDE_INT val = INTVAL (op1) - 1;
946 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
971 op_mode = GET_MODE (op0);
977 if (HONOR_NANS (op_mode))
992 if (HONOR_NANS (op_mode))
1001 reverse_compare = 0;
1006 reverse_compare = 1;
1011 reverse_compare = 1;
1016 reverse_compare = 0;
1021 reverse_compare = 1;
1026 reverse_compare = 0;
1072 comp_mode = op_mode;
1076 comp_mode = op_mode;
1080 comp_mode = op_mode;
1084 comp_mode = V4SImode;
1088 comp_mode = V2DImode;
1095 if (GET_MODE (op1) == DFmode
1096 && (scode != SPU_GT && scode != SPU_EQ))
1099 if (is_set == 0 && op1 == const0_rtx
1100 && (GET_MODE (op0) == SImode
1101 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1103 /* Don't need to set a register with the result when we are
1104 comparing against zero and branching. */
1105 reverse_test = !reverse_test;
1106 compare_result = op0;
1110 compare_result = gen_reg_rtx (comp_mode);
1112 if (reverse_compare)
1119 if (spu_comp_icode[index][scode] == 0)
1122 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1124 op0 = force_reg (op_mode, op0);
1125 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1127 op1 = force_reg (op_mode, op1);
1128 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1132 emit_insn (comp_rtx);
1136 eq_result = gen_reg_rtx (comp_mode);
1137 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1142 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1143 gcc_assert (ior_code != CODE_FOR_nothing);
1144 emit_insn (GEN_FCN (ior_code)
1145 (compare_result, compare_result, eq_result));
1154 /* We don't have branch on QI compare insns, so we convert the
1155 QI compare result to a HI result. */
1156 if (comp_mode == QImode)
1158 rtx old_res = compare_result;
1159 compare_result = gen_reg_rtx (HImode);
1161 emit_insn (gen_extendqihi2 (compare_result, old_res));
1165 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1167 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1169 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1170 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1171 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1174 else if (is_set == 2)
1176 rtx target = operands[0];
1177 int compare_size = GET_MODE_BITSIZE (comp_mode);
1178 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1179 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1181 rtx op_t = operands[2];
1182 rtx op_f = operands[3];
1184 /* The result of the comparison can be SI, HI or QI mode. Create a
1185 mask based on that result. */
1186 if (target_size > compare_size)
1188 select_mask = gen_reg_rtx (mode);
1189 emit_insn (gen_extend_compare (select_mask, compare_result));
1191 else if (target_size < compare_size)
1193 gen_rtx_SUBREG (mode, compare_result,
1194 (compare_size - target_size) / BITS_PER_UNIT);
1195 else if (comp_mode != mode)
1196 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1198 select_mask = compare_result;
1200 if (GET_MODE (target) != GET_MODE (op_t)
1201 || GET_MODE (target) != GET_MODE (op_f))
1205 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1207 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1211 rtx target = operands[0];
1213 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1214 gen_rtx_NOT (comp_mode, compare_result)));
1215 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1216 emit_insn (gen_extendhisi2 (target, compare_result));
1217 else if (GET_MODE (target) == SImode
1218 && GET_MODE (compare_result) == QImode)
1219 emit_insn (gen_extend_compare (target, compare_result));
1221 emit_move_insn (target, compare_result);
1226 const_double_to_hwint (rtx x)
1230 if (GET_MODE (x) == SFmode)
1232 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1233 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1235 else if (GET_MODE (x) == DFmode)
1238 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1239 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1241 val = (val << 32) | (l[1] & 0xffffffff);
1249 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1253 gcc_assert (mode == SFmode || mode == DFmode);
1256 tv[0] = (v << 32) >> 32;
1257 else if (mode == DFmode)
1259 tv[1] = (v << 32) >> 32;
1262 real_from_target (&rv, tv, mode);
1263 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1267 print_operand_address (FILE * file, register rtx addr)
1272 if (GET_CODE (addr) == AND
1273 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1274 && INTVAL (XEXP (addr, 1)) == -16)
1275 addr = XEXP (addr, 0);
1277 switch (GET_CODE (addr))
1280 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1284 reg = XEXP (addr, 0);
1285 offset = XEXP (addr, 1);
1286 if (GET_CODE (offset) == REG)
1288 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1289 reg_names[REGNO (offset)]);
1291 else if (GET_CODE (offset) == CONST_INT)
1293 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1294 INTVAL (offset), reg_names[REGNO (reg)]);
1304 output_addr_const (file, addr);
1314 print_operand (FILE * file, rtx x, int code)
1316 enum machine_mode mode = GET_MODE (x);
1318 unsigned char arr[16];
1319 int xcode = GET_CODE (x);
1321 if (GET_MODE (x) == VOIDmode)
1324 case 'L': /* 128 bits, signed */
1325 case 'm': /* 128 bits, signed */
1326 case 'T': /* 128 bits, signed */
1327 case 't': /* 128 bits, signed */
1330 case 'K': /* 64 bits, signed */
1331 case 'k': /* 64 bits, signed */
1332 case 'D': /* 64 bits, signed */
1333 case 'd': /* 64 bits, signed */
1336 case 'J': /* 32 bits, signed */
1337 case 'j': /* 32 bits, signed */
1338 case 's': /* 32 bits, signed */
1339 case 'S': /* 32 bits, signed */
1346 case 'j': /* 32 bits, signed */
1347 case 'k': /* 64 bits, signed */
1348 case 'm': /* 128 bits, signed */
1349 if (xcode == CONST_INT
1350 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1352 gcc_assert (logical_immediate_p (x, mode));
1353 constant_to_array (mode, x, arr);
1354 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1355 val = trunc_int_for_mode (val, SImode);
1356 switch (which_logical_immediate (val))
1361 fprintf (file, "h");
1364 fprintf (file, "b");
1374 case 'J': /* 32 bits, signed */
1375 case 'K': /* 64 bits, signed */
1376 case 'L': /* 128 bits, signed */
1377 if (xcode == CONST_INT
1378 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1380 gcc_assert (logical_immediate_p (x, mode)
1381 || iohl_immediate_p (x, mode));
1382 constant_to_array (mode, x, arr);
1383 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1384 val = trunc_int_for_mode (val, SImode);
1385 switch (which_logical_immediate (val))
1391 val = trunc_int_for_mode (val, HImode);
1394 val = trunc_int_for_mode (val, QImode);
1399 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1405 case 't': /* 128 bits, signed */
1406 case 'd': /* 64 bits, signed */
1407 case 's': /* 32 bits, signed */
1410 enum immediate_class c = classify_immediate (x, mode);
1414 constant_to_array (mode, x, arr);
1415 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1416 val = trunc_int_for_mode (val, SImode);
1417 switch (which_immediate_load (val))
1422 fprintf (file, "a");
1425 fprintf (file, "h");
1428 fprintf (file, "hu");
1435 constant_to_array (mode, x, arr);
1436 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1438 fprintf (file, "b");
1440 fprintf (file, "h");
1442 fprintf (file, "w");
1444 fprintf (file, "d");
1447 if (xcode == CONST_VECTOR)
1449 x = CONST_VECTOR_ELT (x, 0);
1450 xcode = GET_CODE (x);
1452 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1453 fprintf (file, "a");
1454 else if (xcode == HIGH)
1455 fprintf (file, "hu");
1469 case 'T': /* 128 bits, signed */
1470 case 'D': /* 64 bits, signed */
1471 case 'S': /* 32 bits, signed */
1474 enum immediate_class c = classify_immediate (x, mode);
1478 constant_to_array (mode, x, arr);
1479 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1480 val = trunc_int_for_mode (val, SImode);
1481 switch (which_immediate_load (val))
1488 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1493 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1496 constant_to_array (mode, x, arr);
1498 for (i = 0; i < 16; i++)
1503 print_operand (file, GEN_INT (val), 0);
1506 constant_to_array (mode, x, arr);
1507 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1508 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1513 if (GET_CODE (x) == CONST_VECTOR)
1514 x = CONST_VECTOR_ELT (x, 0);
1515 output_addr_const (file, x);
1517 fprintf (file, "@h");
1531 if (xcode == CONST_INT)
1533 /* Only 4 least significant bits are relevant for generate
1534 control word instructions. */
1535 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1540 case 'M': /* print code for c*d */
1541 if (GET_CODE (x) == CONST_INT)
1545 fprintf (file, "b");
1548 fprintf (file, "h");
1551 fprintf (file, "w");
1554 fprintf (file, "d");
1563 case 'N': /* Negate the operand */
1564 if (xcode == CONST_INT)
1565 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1566 else if (xcode == CONST_VECTOR)
1567 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1568 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1571 case 'I': /* enable/disable interrupts */
1572 if (xcode == CONST_INT)
1573 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1576 case 'b': /* branch modifiers */
1578 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1579 else if (COMPARISON_P (x))
1580 fprintf (file, "%s", xcode == NE ? "n" : "");
1583 case 'i': /* indirect call */
1586 if (GET_CODE (XEXP (x, 0)) == REG)
1587 /* Used in indirect function calls. */
1588 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1590 output_address (XEXP (x, 0));
1594 case 'p': /* load/store */
1598 xcode = GET_CODE (x);
1603 xcode = GET_CODE (x);
1606 fprintf (file, "d");
1607 else if (xcode == CONST_INT)
1608 fprintf (file, "a");
1609 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1610 fprintf (file, "r");
1611 else if (xcode == PLUS || xcode == LO_SUM)
1613 if (GET_CODE (XEXP (x, 1)) == REG)
1614 fprintf (file, "x");
1616 fprintf (file, "d");
1621 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1623 output_addr_const (file, GEN_INT (val));
1627 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1629 output_addr_const (file, GEN_INT (val));
1633 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1635 output_addr_const (file, GEN_INT (val));
1639 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1640 val = (val >> 3) & 0x1f;
1641 output_addr_const (file, GEN_INT (val));
1645 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1648 output_addr_const (file, GEN_INT (val));
1652 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1655 output_addr_const (file, GEN_INT (val));
1659 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1662 output_addr_const (file, GEN_INT (val));
1666 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1667 val = -(val & -8ll);
1668 val = (val >> 3) & 0x1f;
1669 output_addr_const (file, GEN_INT (val));
1674 constant_to_array (mode, x, arr);
1675 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1676 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1681 fprintf (file, "%s", reg_names[REGNO (x)]);
1682 else if (xcode == MEM)
1683 output_address (XEXP (x, 0));
1684 else if (xcode == CONST_VECTOR)
1685 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1687 output_addr_const (file, x);
1694 output_operand_lossage ("invalid %%xn code");
1699 extern char call_used_regs[];
1701 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1702 caller saved register. For leaf functions it is more efficient to
1703 use a volatile register because we won't need to save and restore the
1704 pic register. This routine is only valid after register allocation
1705 is completed, so we can pick an unused register. */
1709 rtx pic_reg = pic_offset_table_rtx;
1710 if (!reload_completed && !reload_in_progress)
1712 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1713 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1717 /* Split constant addresses to handle cases that are too large.
1718 Add in the pic register when in PIC mode.
1719 Split immediates that require more than 1 instruction. */
1721 spu_split_immediate (rtx * ops)
1723 enum machine_mode mode = GET_MODE (ops[0]);
1724 enum immediate_class c = classify_immediate (ops[1], mode);
1730 unsigned char arrhi[16];
1731 unsigned char arrlo[16];
1732 rtx to, temp, hi, lo;
1734 enum machine_mode imode = mode;
1735 /* We need to do reals as ints because the constant used in the
1736 IOR might not be a legitimate real constant. */
1737 imode = int_mode_for_mode (mode);
1738 constant_to_array (mode, ops[1], arrhi);
1740 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1743 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1744 for (i = 0; i < 16; i += 4)
1746 arrlo[i + 2] = arrhi[i + 2];
1747 arrlo[i + 3] = arrhi[i + 3];
1748 arrlo[i + 0] = arrlo[i + 1] = 0;
1749 arrhi[i + 2] = arrhi[i + 3] = 0;
1751 hi = array_to_constant (imode, arrhi);
1752 lo = array_to_constant (imode, arrlo);
1753 emit_move_insn (temp, hi);
1754 emit_insn (gen_rtx_SET
1755 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1760 unsigned char arr_fsmbi[16];
1761 unsigned char arr_andbi[16];
1762 rtx to, reg_fsmbi, reg_and;
1764 enum machine_mode imode = mode;
1765 /* We need to do reals as ints because the constant used in the
1766 * AND might not be a legitimate real constant. */
1767 imode = int_mode_for_mode (mode);
1768 constant_to_array (mode, ops[1], arr_fsmbi);
1770 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1773 for (i = 0; i < 16; i++)
1774 if (arr_fsmbi[i] != 0)
1776 arr_andbi[0] = arr_fsmbi[i];
1777 arr_fsmbi[i] = 0xff;
1779 for (i = 1; i < 16; i++)
1780 arr_andbi[i] = arr_andbi[0];
1781 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1782 reg_and = array_to_constant (imode, arr_andbi);
1783 emit_move_insn (to, reg_fsmbi);
1784 emit_insn (gen_rtx_SET
1785 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1789 if (reload_in_progress || reload_completed)
1791 rtx mem = force_const_mem (mode, ops[1]);
1792 if (TARGET_LARGE_MEM)
1794 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1795 emit_move_insn (addr, XEXP (mem, 0));
1796 mem = replace_equiv_address (mem, addr);
1798 emit_move_insn (ops[0], mem);
1804 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1808 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1809 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1812 emit_insn (gen_pic (ops[0], ops[1]));
1815 rtx pic_reg = get_pic_reg ();
1816 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1817 crtl->uses_pic_offset_table = 1;
1819 return flag_pic || c == IC_IL2s;
1830 /* SAVING is TRUE when we are generating the actual load and store
1831 instructions for REGNO. When determining the size of the stack
1832 needed for saving register we must allocate enough space for the
1833 worst case, because we don't always have the information early enough
1834 to not allocate it. But we can at least eliminate the actual loads
1835 and stores during the prologue/epilogue. */
1837 need_to_save_reg (int regno, int saving)
1839 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1842 && regno == PIC_OFFSET_TABLE_REGNUM
1843 && (!saving || crtl->uses_pic_offset_table)
1845 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1850 /* This function is only correct starting with local register
1853 spu_saved_regs_size (void)
1855 int reg_save_size = 0;
1858 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1859 if (need_to_save_reg (regno, 0))
1860 reg_save_size += 0x10;
1861 return reg_save_size;
1865 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1867 rtx reg = gen_rtx_REG (V4SImode, regno);
1869 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1870 return emit_insn (gen_movv4si (mem, reg));
1874 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1876 rtx reg = gen_rtx_REG (V4SImode, regno);
1878 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1879 return emit_insn (gen_movv4si (reg, mem));
1882 /* This happens after reload, so we need to expand it. */
1884 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1887 if (satisfies_constraint_K (GEN_INT (imm)))
1889 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1893 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1894 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1895 if (REGNO (src) == REGNO (scratch))
1901 /* Return nonzero if this function is known to have a null epilogue. */
1904 direct_return (void)
1906 if (reload_completed)
1908 if (cfun->static_chain_decl == 0
1909 && (spu_saved_regs_size ()
1911 + crtl->outgoing_args_size
1912 + crtl->args.pretend_args_size == 0)
1913 && current_function_is_leaf)
1920 The stack frame looks like this:
1924 AP -> +-------------+
1927 prev SP | back chain |
1930 | reg save | crtl->args.pretend_args_size bytes
1933 | saved regs | spu_saved_regs_size() bytes
1934 FP -> +-------------+
1936 | vars | get_frame_size() bytes
1937 HFP -> +-------------+
1940 | args | crtl->outgoing_args_size bytes
1946 SP -> +-------------+
1950 spu_expand_prologue (void)
1952 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1953 HOST_WIDE_INT total_size;
1954 HOST_WIDE_INT saved_regs_size;
1955 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1956 rtx scratch_reg_0, scratch_reg_1;
1959 if (flag_pic && optimize == 0)
1960 crtl->uses_pic_offset_table = 1;
1962 if (spu_naked_function_p (current_function_decl))
1965 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1966 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1968 saved_regs_size = spu_saved_regs_size ();
1969 total_size = size + saved_regs_size
1970 + crtl->outgoing_args_size
1971 + crtl->args.pretend_args_size;
1973 if (!current_function_is_leaf
1974 || cfun->calls_alloca || total_size > 0)
1975 total_size += STACK_POINTER_OFFSET;
1977 /* Save this first because code after this might use the link
1978 register as a scratch register. */
1979 if (!current_function_is_leaf)
1981 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1982 RTX_FRAME_RELATED_P (insn) = 1;
1987 offset = -crtl->args.pretend_args_size;
1988 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1989 if (need_to_save_reg (regno, 1))
1992 insn = frame_emit_store (regno, sp_reg, offset);
1993 RTX_FRAME_RELATED_P (insn) = 1;
1997 if (flag_pic && crtl->uses_pic_offset_table)
1999 rtx pic_reg = get_pic_reg ();
2000 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2001 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2006 if (flag_stack_check)
2008 /* We compare against total_size-1 because
2009 ($sp >= total_size) <=> ($sp > total_size-1) */
2010 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2011 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2012 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2013 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2015 emit_move_insn (scratch_v4si, size_v4si);
2016 size_v4si = scratch_v4si;
2018 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2019 emit_insn (gen_vec_extractv4si
2020 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2021 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2024 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2025 the value of the previous $sp because we save it as the back
2027 if (total_size <= 2000)
2029 /* In this case we save the back chain first. */
2030 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2032 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2036 insn = emit_move_insn (scratch_reg_0, sp_reg);
2038 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2040 RTX_FRAME_RELATED_P (insn) = 1;
2041 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2042 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2044 if (total_size > 2000)
2046 /* Save the back chain ptr */
2047 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2050 if (frame_pointer_needed)
2052 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2053 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2054 + crtl->outgoing_args_size;
2055 /* Set the new frame_pointer */
2056 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2057 RTX_FRAME_RELATED_P (insn) = 1;
2058 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2059 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2060 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2067 spu_expand_epilogue (bool sibcall_p)
2069 int size = get_frame_size (), offset, regno;
2070 HOST_WIDE_INT saved_regs_size, total_size;
2071 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2072 rtx jump, scratch_reg_0;
2074 if (spu_naked_function_p (current_function_decl))
2077 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2079 saved_regs_size = spu_saved_regs_size ();
2080 total_size = size + saved_regs_size
2081 + crtl->outgoing_args_size
2082 + crtl->args.pretend_args_size;
2084 if (!current_function_is_leaf
2085 || cfun->calls_alloca || total_size > 0)
2086 total_size += STACK_POINTER_OFFSET;
2090 if (cfun->calls_alloca)
2091 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2093 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2096 if (saved_regs_size > 0)
2098 offset = -crtl->args.pretend_args_size;
2099 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2100 if (need_to_save_reg (regno, 1))
2103 frame_emit_load (regno, sp_reg, offset);
2108 if (!current_function_is_leaf)
2109 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2113 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2114 jump = emit_jump_insn (gen__return ());
2115 emit_barrier_after (jump);
2121 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2125 /* This is inefficient because it ends up copying to a save-register
2126 which then gets saved even though $lr has already been saved. But
2127 it does generate better code for leaf functions and we don't need
2128 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2129 used for __builtin_return_address anyway, so maybe we don't care if
2130 it's inefficient. */
2131 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2135 /* Given VAL, generate a constant appropriate for MODE.
2136 If MODE is a vector mode, every element will be VAL.
2137 For TImode, VAL will be zero extended to 128 bits. */
2139 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2145 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2146 || GET_MODE_CLASS (mode) == MODE_FLOAT
2147 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2148 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2150 if (GET_MODE_CLASS (mode) == MODE_INT)
2151 return immed_double_const (val, 0, mode);
2153 /* val is the bit representation of the float */
2154 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2155 return hwint_to_const_double (mode, val);
2157 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2158 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2160 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2162 units = GET_MODE_NUNITS (mode);
2164 v = rtvec_alloc (units);
2166 for (i = 0; i < units; ++i)
2167 RTVEC_ELT (v, i) = inner;
2169 return gen_rtx_CONST_VECTOR (mode, v);
2172 /* Create a MODE vector constant from 4 ints. */
2174 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2176 unsigned char arr[16];
2177 arr[0] = (a >> 24) & 0xff;
2178 arr[1] = (a >> 16) & 0xff;
2179 arr[2] = (a >> 8) & 0xff;
2180 arr[3] = (a >> 0) & 0xff;
2181 arr[4] = (b >> 24) & 0xff;
2182 arr[5] = (b >> 16) & 0xff;
2183 arr[6] = (b >> 8) & 0xff;
2184 arr[7] = (b >> 0) & 0xff;
2185 arr[8] = (c >> 24) & 0xff;
2186 arr[9] = (c >> 16) & 0xff;
2187 arr[10] = (c >> 8) & 0xff;
2188 arr[11] = (c >> 0) & 0xff;
2189 arr[12] = (d >> 24) & 0xff;
2190 arr[13] = (d >> 16) & 0xff;
2191 arr[14] = (d >> 8) & 0xff;
2192 arr[15] = (d >> 0) & 0xff;
2193 return array_to_constant(mode, arr);
2196 /* branch hint stuff */
2198 /* An array of these is used to propagate hints to predecessor blocks. */
2201 rtx prop_jump; /* propagated from another block */
2202 int bb_index; /* the original block. */
2204 static struct spu_bb_info *spu_bb_info;
2206 #define STOP_HINT_P(INSN) \
2207 (GET_CODE(INSN) == CALL_INSN \
2208 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2209 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2211 /* 1 when RTX is a hinted branch or its target. We keep track of
2212 what has been hinted so the safe-hint code can test it easily. */
2213 #define HINTED_P(RTX) \
2214 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2216 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2217 #define SCHED_ON_EVEN_P(RTX) \
2218 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2220 /* Emit a nop for INSN such that the two will dual issue. This assumes
2221 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2222 We check for TImode to handle a MULTI1 insn which has dual issued its
2223 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2226 emit_nop_for_insn (rtx insn)
2230 p = get_pipe (insn);
2231 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2232 new_insn = emit_insn_after (gen_lnop (), insn);
2233 else if (p == 1 && GET_MODE (insn) == TImode)
2235 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2236 PUT_MODE (new_insn, TImode);
2237 PUT_MODE (insn, VOIDmode);
2240 new_insn = emit_insn_after (gen_lnop (), insn);
2241 recog_memoized (new_insn);
2244 /* Insert nops in basic blocks to meet dual issue alignment
2245 requirements. Also make sure hbrp and hint instructions are at least
2246 one cycle apart, possibly inserting a nop. */
2250 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2254 /* This sets up INSN_ADDRESSES. */
2255 shorten_branches (get_insns ());
2257 /* Keep track of length added by nops. */
2261 insn = get_insns ();
2262 if (!active_insn_p (insn))
2263 insn = next_active_insn (insn);
2264 for (; insn; insn = next_insn)
2266 next_insn = next_active_insn (insn);
2267 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2268 || INSN_CODE (insn) == CODE_FOR_hbr)
2272 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2273 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2274 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2277 prev_insn = emit_insn_before (gen_lnop (), insn);
2278 PUT_MODE (prev_insn, GET_MODE (insn));
2279 PUT_MODE (insn, TImode);
2285 if (INSN_CODE (insn) == CODE_FOR_blockage)
2287 if (GET_MODE (insn) == TImode)
2288 PUT_MODE (next_insn, TImode);
2290 next_insn = next_active_insn (insn);
2292 addr = INSN_ADDRESSES (INSN_UID (insn));
2293 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2295 if (((addr + length) & 7) != 0)
2297 emit_nop_for_insn (prev_insn);
2301 else if (GET_MODE (insn) == TImode
2302 && ((next_insn && GET_MODE (next_insn) != TImode)
2303 || get_attr_type (insn) == TYPE_MULTI0)
2304 && ((addr + length) & 7) != 0)
2306 /* prev_insn will always be set because the first insn is
2307 always 8-byte aligned. */
2308 emit_nop_for_insn (prev_insn);
2316 /* Routines for branch hints. */
2319 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2320 int distance, sbitmap blocks)
2322 rtx branch_label = 0;
2327 if (before == 0 || branch == 0 || target == 0)
2330 /* While scheduling we require hints to be no further than 600, so
2331 we need to enforce that here too */
2335 /* If we have a Basic block note, emit it after the basic block note. */
2336 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2337 before = NEXT_INSN (before);
2339 branch_label = gen_label_rtx ();
2340 LABEL_NUSES (branch_label)++;
2341 LABEL_PRESERVE_P (branch_label) = 1;
2342 insn = emit_label_before (branch_label, branch);
2343 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2344 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2346 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2347 recog_memoized (hint);
2348 HINTED_P (branch) = 1;
2350 if (GET_CODE (target) == LABEL_REF)
2351 HINTED_P (XEXP (target, 0)) = 1;
2352 else if (tablejump_p (branch, 0, &table))
2356 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2357 vec = XVEC (PATTERN (table), 0);
2359 vec = XVEC (PATTERN (table), 1);
2360 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2361 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2364 if (distance >= 588)
2366 /* Make sure the hint isn't scheduled any earlier than this point,
2367 which could make it too far for the branch offest to fit */
2368 recog_memoized (emit_insn_before (gen_blockage (), hint));
2370 else if (distance <= 8 * 4)
2372 /* To guarantee at least 8 insns between the hint and branch we
2375 for (d = distance; d < 8 * 4; d += 4)
2378 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2379 recog_memoized (insn);
2382 /* Make sure any nops inserted aren't scheduled before the hint. */
2383 recog_memoized (emit_insn_after (gen_blockage (), hint));
2385 /* Make sure any nops inserted aren't scheduled after the call. */
2386 if (CALL_P (branch) && distance < 8 * 4)
2387 recog_memoized (emit_insn_before (gen_blockage (), branch));
2391 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2392 the rtx for the branch target. */
2394 get_branch_target (rtx branch)
2396 if (GET_CODE (branch) == JUMP_INSN)
2400 /* Return statements */
2401 if (GET_CODE (PATTERN (branch)) == RETURN)
2402 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2405 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2406 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2410 if (extract_asm_operands (PATTERN (branch)) != NULL)
2413 set = single_set (branch);
2414 src = SET_SRC (set);
2415 if (GET_CODE (SET_DEST (set)) != PC)
2418 if (GET_CODE (src) == IF_THEN_ELSE)
2421 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2424 /* If the more probable case is not a fall through, then
2425 try a branch hint. */
2426 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2427 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2428 && GET_CODE (XEXP (src, 1)) != PC)
2429 lab = XEXP (src, 1);
2430 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2431 && GET_CODE (XEXP (src, 2)) != PC)
2432 lab = XEXP (src, 2);
2436 if (GET_CODE (lab) == RETURN)
2437 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2445 else if (GET_CODE (branch) == CALL_INSN)
2448 /* All of our call patterns are in a PARALLEL and the CALL is
2449 the first pattern in the PARALLEL. */
2450 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2452 call = XVECEXP (PATTERN (branch), 0, 0);
2453 if (GET_CODE (call) == SET)
2454 call = SET_SRC (call);
2455 if (GET_CODE (call) != CALL)
2457 return XEXP (XEXP (call, 0), 0);
2462 /* The special $hbr register is used to prevent the insn scheduler from
2463 moving hbr insns across instructions which invalidate them. It
2464 should only be used in a clobber, and this function searches for
2465 insns which clobber it. */
2467 insn_clobbers_hbr (rtx insn)
2470 && GET_CODE (PATTERN (insn)) == PARALLEL)
2472 rtx parallel = PATTERN (insn);
2475 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2477 clobber = XVECEXP (parallel, 0, j);
2478 if (GET_CODE (clobber) == CLOBBER
2479 && GET_CODE (XEXP (clobber, 0)) == REG
2480 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2487 /* Search up to 32 insns starting at FIRST:
2488 - at any kind of hinted branch, just return
2489 - at any unconditional branch in the first 15 insns, just return
2490 - at a call or indirect branch, after the first 15 insns, force it to
2491 an even address and return
2492 - at any unconditional branch, after the first 15 insns, force it to
2494 At then end of the search, insert an hbrp within 4 insns of FIRST,
2495 and an hbrp within 16 instructions of FIRST.
2498 insert_hbrp_for_ilb_runout (rtx first)
2500 rtx insn, before_4 = 0, before_16 = 0;
2501 int addr = 0, length, first_addr = -1;
2502 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2503 int insert_lnop_after = 0;
2504 for (insn = first; insn; insn = NEXT_INSN (insn))
2507 if (first_addr == -1)
2508 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2509 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2510 length = get_attr_length (insn);
2512 if (before_4 == 0 && addr + length >= 4 * 4)
2514 /* We test for 14 instructions because the first hbrp will add
2515 up to 2 instructions. */
2516 if (before_16 == 0 && addr + length >= 14 * 4)
2519 if (INSN_CODE (insn) == CODE_FOR_hbr)
2521 /* Make sure an hbrp is at least 2 cycles away from a hint.
2522 Insert an lnop after the hbrp when necessary. */
2523 if (before_4 == 0 && addr > 0)
2526 insert_lnop_after |= 1;
2528 else if (before_4 && addr <= 4 * 4)
2529 insert_lnop_after |= 1;
2530 if (before_16 == 0 && addr > 10 * 4)
2533 insert_lnop_after |= 2;
2535 else if (before_16 && addr <= 14 * 4)
2536 insert_lnop_after |= 2;
2539 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2541 if (addr < hbrp_addr0)
2543 else if (addr < hbrp_addr1)
2547 if (CALL_P (insn) || JUMP_P (insn))
2549 if (HINTED_P (insn))
2552 /* Any branch after the first 15 insns should be on an even
2553 address to avoid a special case branch. There might be
2554 some nops and/or hbrps inserted, so we test after 10
2557 SCHED_ON_EVEN_P (insn) = 1;
2560 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2564 if (addr + length >= 32 * 4)
2566 gcc_assert (before_4 && before_16);
2567 if (hbrp_addr0 > 4 * 4)
2570 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2571 recog_memoized (insn);
2572 INSN_ADDRESSES_NEW (insn,
2573 INSN_ADDRESSES (INSN_UID (before_4)));
2574 PUT_MODE (insn, GET_MODE (before_4));
2575 PUT_MODE (before_4, TImode);
2576 if (insert_lnop_after & 1)
2578 insn = emit_insn_before (gen_lnop (), before_4);
2579 recog_memoized (insn);
2580 INSN_ADDRESSES_NEW (insn,
2581 INSN_ADDRESSES (INSN_UID (before_4)));
2582 PUT_MODE (insn, TImode);
2585 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2586 && hbrp_addr1 > 16 * 4)
2589 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2590 recog_memoized (insn);
2591 INSN_ADDRESSES_NEW (insn,
2592 INSN_ADDRESSES (INSN_UID (before_16)));
2593 PUT_MODE (insn, GET_MODE (before_16));
2594 PUT_MODE (before_16, TImode);
2595 if (insert_lnop_after & 2)
2597 insn = emit_insn_before (gen_lnop (), before_16);
2598 recog_memoized (insn);
2599 INSN_ADDRESSES_NEW (insn,
2600 INSN_ADDRESSES (INSN_UID
2602 PUT_MODE (insn, TImode);
2608 else if (BARRIER_P (insn))
2613 /* The SPU might hang when it executes 48 inline instructions after a
2614 hinted branch jumps to its hinted target. The beginning of a
2615 function and the return from a call might have been hinted, and must
2616 be handled as well. To prevent a hang we insert 2 hbrps. The first
2617 should be within 6 insns of the branch target. The second should be
2618 within 22 insns of the branch target. When determining if hbrps are
2619 necessary, we look for only 32 inline instructions, because up to to
2620 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2621 new hbrps, we insert them within 4 and 16 insns of the target. */
2626 if (TARGET_SAFE_HINTS)
2628 shorten_branches (get_insns ());
2629 /* Insert hbrp at beginning of function */
2630 insn = next_active_insn (get_insns ());
2632 insert_hbrp_for_ilb_runout (insn);
2633 /* Insert hbrp after hinted targets. */
2634 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2635 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2636 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2640 static int in_spu_reorg;
2642 /* Insert branch hints. There are no branch optimizations after this
2643 pass, so it's safe to set our branch hints now. */
2645 spu_machine_dependent_reorg (void)
2650 rtx branch_target = 0;
2651 int branch_addr = 0, insn_addr, required_dist = 0;
2655 if (!TARGET_BRANCH_HINTS || optimize == 0)
2657 /* We still do it for unoptimized code because an external
2658 function might have hinted a call or return. */
2664 blocks = sbitmap_alloc (last_basic_block);
2665 sbitmap_zero (blocks);
2668 compute_bb_for_insn ();
2673 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2674 sizeof (struct spu_bb_info));
2676 /* We need exact insn addresses and lengths. */
2677 shorten_branches (get_insns ());
2679 for (i = n_basic_blocks - 1; i >= 0; i--)
2681 bb = BASIC_BLOCK (i);
2683 if (spu_bb_info[i].prop_jump)
2685 branch = spu_bb_info[i].prop_jump;
2686 branch_target = get_branch_target (branch);
2687 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2688 required_dist = spu_hint_dist;
2690 /* Search from end of a block to beginning. In this loop, find
2691 jumps which need a branch and emit them only when:
2692 - it's an indirect branch and we're at the insn which sets
2694 - we're at an insn that will invalidate the hint. e.g., a
2695 call, another hint insn, inline asm that clobbers $hbr, and
2696 some inlined operations (divmodsi4). Don't consider jumps
2697 because they are only at the end of a block and are
2698 considered when we are deciding whether to propagate
2699 - we're getting too far away from the branch. The hbr insns
2700 only have a signed 10 bit offset
2701 We go back as far as possible so the branch will be considered
2702 for propagation when we get to the beginning of the block. */
2703 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2707 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2709 && ((GET_CODE (branch_target) == REG
2710 && set_of (branch_target, insn) != NULL_RTX)
2711 || insn_clobbers_hbr (insn)
2712 || branch_addr - insn_addr > 600))
2714 rtx next = NEXT_INSN (insn);
2715 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2716 if (insn != BB_END (bb)
2717 && branch_addr - next_addr >= required_dist)
2721 "hint for %i in block %i before %i\n",
2722 INSN_UID (branch), bb->index,
2724 spu_emit_branch_hint (next, branch, branch_target,
2725 branch_addr - next_addr, blocks);
2730 /* JUMP_P will only be true at the end of a block. When
2731 branch is already set it means we've previously decided
2732 to propagate a hint for that branch into this block. */
2733 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2736 if ((branch_target = get_branch_target (insn)))
2739 branch_addr = insn_addr;
2740 required_dist = spu_hint_dist;
2744 if (insn == BB_HEAD (bb))
2750 /* If we haven't emitted a hint for this branch yet, it might
2751 be profitable to emit it in one of the predecessor blocks,
2752 especially for loops. */
2754 basic_block prev = 0, prop = 0, prev2 = 0;
2755 int loop_exit = 0, simple_loop = 0;
2756 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2758 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2759 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2760 prev = EDGE_PRED (bb, j)->src;
2762 prev2 = EDGE_PRED (bb, j)->src;
2764 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2765 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2767 else if (EDGE_SUCC (bb, j)->dest == bb)
2770 /* If this branch is a loop exit then propagate to previous
2771 fallthru block. This catches the cases when it is a simple
2772 loop or when there is an initial branch into the loop. */
2773 if (prev && (loop_exit || simple_loop)
2774 && prev->loop_depth <= bb->loop_depth)
2777 /* If there is only one adjacent predecessor. Don't propagate
2778 outside this loop. This loop_depth test isn't perfect, but
2779 I'm not sure the loop_father member is valid at this point. */
2780 else if (prev && single_pred_p (bb)
2781 && prev->loop_depth == bb->loop_depth)
2784 /* If this is the JOIN block of a simple IF-THEN then
2785 propogate the hint to the HEADER block. */
2786 else if (prev && prev2
2787 && EDGE_COUNT (bb->preds) == 2
2788 && EDGE_COUNT (prev->preds) == 1
2789 && EDGE_PRED (prev, 0)->src == prev2
2790 && prev2->loop_depth == bb->loop_depth
2791 && GET_CODE (branch_target) != REG)
2794 /* Don't propagate when:
2795 - this is a simple loop and the hint would be too far
2796 - this is not a simple loop and there are 16 insns in
2798 - the predecessor block ends in a branch that will be
2800 - the predecessor block ends in an insn that invalidates
2804 && (bbend = BB_END (prop))
2805 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2806 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2807 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2810 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2811 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2812 bb->index, prop->index, bb->loop_depth,
2813 INSN_UID (branch), loop_exit, simple_loop,
2814 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2816 spu_bb_info[prop->index].prop_jump = branch;
2817 spu_bb_info[prop->index].bb_index = i;
2819 else if (branch_addr - next_addr >= required_dist)
2822 fprintf (dump_file, "hint for %i in block %i before %i\n",
2823 INSN_UID (branch), bb->index,
2824 INSN_UID (NEXT_INSN (insn)));
2825 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2826 branch_addr - next_addr, blocks);
2833 if (!sbitmap_empty_p (blocks))
2834 find_many_sub_basic_blocks (blocks);
2836 /* We have to schedule to make sure alignment is ok. */
2837 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2839 /* The hints need to be scheduled, so call it again. */
2846 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2847 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2849 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2850 between its branch label and the branch . We don't move the
2851 label because GCC expects it at the beginning of the block. */
2852 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2853 rtx label_ref = XVECEXP (unspec, 0, 0);
2854 rtx label = XEXP (label_ref, 0);
2857 for (branch = NEXT_INSN (label);
2858 !JUMP_P (branch) && !CALL_P (branch);
2859 branch = NEXT_INSN (branch))
2860 if (NONJUMP_INSN_P (branch))
2861 offset += get_attr_length (branch);
2863 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2866 if (spu_flag_var_tracking)
2869 timevar_push (TV_VAR_TRACKING);
2870 variable_tracking_main ();
2871 timevar_pop (TV_VAR_TRACKING);
2872 df_finish_pass (false);
2875 free_bb_for_insn ();
2881 /* Insn scheduling routines, primarily for dual issue. */
2883 spu_sched_issue_rate (void)
2889 uses_ls_unit(rtx insn)
2891 rtx set = single_set (insn);
2893 && (GET_CODE (SET_DEST (set)) == MEM
2894 || GET_CODE (SET_SRC (set)) == MEM))
2903 /* Handle inline asm */
2904 if (INSN_CODE (insn) == -1)
2906 t = get_attr_type (insn);
2931 case TYPE_IPREFETCH:
2939 /* haifa-sched.c has a static variable that keeps track of the current
2940 cycle. It is passed to spu_sched_reorder, and we record it here for
2941 use by spu_sched_variable_issue. It won't be accurate if the
2942 scheduler updates it's clock_var between the two calls. */
2943 static int clock_var;
2945 /* This is used to keep track of insn alignment. Set to 0 at the
2946 beginning of each block and increased by the "length" attr of each
2948 static int spu_sched_length;
2950 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2951 ready list appropriately in spu_sched_reorder(). */
2952 static int pipe0_clock;
2953 static int pipe1_clock;
2955 static int prev_clock_var;
2957 static int prev_priority;
2959 /* The SPU needs to load the next ilb sometime during the execution of
2960 the previous ilb. There is a potential conflict if every cycle has a
2961 load or store. To avoid the conflict we make sure the load/store
2962 unit is free for at least one cycle during the execution of insns in
2963 the previous ilb. */
2964 static int spu_ls_first;
2965 static int prev_ls_clock;
2968 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2969 int max_ready ATTRIBUTE_UNUSED)
2971 spu_sched_length = 0;
2975 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2976 int max_ready ATTRIBUTE_UNUSED)
2978 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2980 /* When any block might be at least 8-byte aligned, assume they
2981 will all be at least 8-byte aligned to make sure dual issue
2982 works out correctly. */
2983 spu_sched_length = 0;
2985 spu_ls_first = INT_MAX;
2990 prev_clock_var = -1;
2995 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2996 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3000 if (GET_CODE (PATTERN (insn)) == USE
3001 || GET_CODE (PATTERN (insn)) == CLOBBER
3002 || (len = get_attr_length (insn)) == 0)
3005 spu_sched_length += len;
3007 /* Reset on inline asm */
3008 if (INSN_CODE (insn) == -1)
3010 spu_ls_first = INT_MAX;
3015 p = get_pipe (insn);
3017 pipe0_clock = clock_var;
3019 pipe1_clock = clock_var;
3023 if (clock_var - prev_ls_clock > 1
3024 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3025 spu_ls_first = INT_MAX;
3026 if (uses_ls_unit (insn))
3028 if (spu_ls_first == INT_MAX)
3029 spu_ls_first = spu_sched_length;
3030 prev_ls_clock = clock_var;
3033 /* The scheduler hasn't inserted the nop, but we will later on.
3034 Include those nops in spu_sched_length. */
3035 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3036 spu_sched_length += 4;
3037 prev_clock_var = clock_var;
3039 /* more is -1 when called from spu_sched_reorder for new insns
3040 that don't have INSN_PRIORITY */
3042 prev_priority = INSN_PRIORITY (insn);
3045 /* Always try issueing more insns. spu_sched_reorder will decide
3046 when the cycle should be advanced. */
3050 /* This function is called for both TARGET_SCHED_REORDER and
3051 TARGET_SCHED_REORDER2. */
3053 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3054 rtx *ready, int *nreadyp, int clock)
3056 int i, nready = *nreadyp;
3057 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3062 if (nready <= 0 || pipe1_clock >= clock)
3065 /* Find any rtl insns that don't generate assembly insns and schedule
3067 for (i = nready - 1; i >= 0; i--)
3070 if (INSN_CODE (insn) == -1
3071 || INSN_CODE (insn) == CODE_FOR_blockage
3072 || (INSN_P (insn) && get_attr_length (insn) == 0))
3074 ready[i] = ready[nready - 1];
3075 ready[nready - 1] = insn;
3080 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3081 for (i = 0; i < nready; i++)
3082 if (INSN_CODE (ready[i]) != -1)
3085 switch (get_attr_type (insn))
3110 case TYPE_IPREFETCH:
3116 /* In the first scheduling phase, schedule loads and stores together
3117 to increase the chance they will get merged during postreload CSE. */
3118 if (!reload_completed && pipe_ls >= 0)
3120 insn = ready[pipe_ls];
3121 ready[pipe_ls] = ready[nready - 1];
3122 ready[nready - 1] = insn;
3126 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3130 /* When we have loads/stores in every cycle of the last 15 insns and
3131 we are about to schedule another load/store, emit an hbrp insn
3134 && spu_sched_length - spu_ls_first >= 4 * 15
3135 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3137 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3138 recog_memoized (insn);
3139 if (pipe0_clock < clock)
3140 PUT_MODE (insn, TImode);
3141 spu_sched_variable_issue (file, verbose, insn, -1);
3145 /* In general, we want to emit nops to increase dual issue, but dual
3146 issue isn't faster when one of the insns could be scheduled later
3147 without effecting the critical path. We look at INSN_PRIORITY to
3148 make a good guess, but it isn't perfect so -mdual-nops=n can be
3149 used to effect it. */
3150 if (in_spu_reorg && spu_dual_nops < 10)
3152 /* When we are at an even address and we are not issueing nops to
3153 improve scheduling then we need to advance the cycle. */
3154 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3155 && (spu_dual_nops == 0
3158 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3161 /* When at an odd address, schedule the highest priority insn
3162 without considering pipeline. */
3163 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3164 && (spu_dual_nops == 0
3166 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3171 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3172 pipe0 insn in the ready list, schedule it. */
3173 if (pipe0_clock < clock && pipe_0 >= 0)
3174 schedule_i = pipe_0;
3176 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3177 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3179 schedule_i = pipe_1;
3181 if (schedule_i > -1)
3183 insn = ready[schedule_i];
3184 ready[schedule_i] = ready[nready - 1];
3185 ready[nready - 1] = insn;
3191 /* INSN is dependent on DEP_INSN. */
3193 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3197 /* The blockage pattern is used to prevent instructions from being
3198 moved across it and has no cost. */
3199 if (INSN_CODE (insn) == CODE_FOR_blockage
3200 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3203 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3204 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3207 /* Make sure hbrps are spread out. */
3208 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3209 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3212 /* Make sure hints and hbrps are 2 cycles apart. */
3213 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3214 || INSN_CODE (insn) == CODE_FOR_hbr)
3215 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3216 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3219 /* An hbrp has no real dependency on other insns. */
3220 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3221 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3224 /* Assuming that it is unlikely an argument register will be used in
3225 the first cycle of the called function, we reduce the cost for
3226 slightly better scheduling of dep_insn. When not hinted, the
3227 mispredicted branch would hide the cost as well. */
3230 rtx target = get_branch_target (insn);
3231 if (GET_CODE (target) != REG || !set_of (target, insn))
3236 /* And when returning from a function, let's assume the return values
3237 are completed sooner too. */
3238 if (CALL_P (dep_insn))
3241 /* Make sure an instruction that loads from the back chain is schedule
3242 away from the return instruction so a hint is more likely to get
3244 if (INSN_CODE (insn) == CODE_FOR__return
3245 && (set = single_set (dep_insn))
3246 && GET_CODE (SET_DEST (set)) == REG
3247 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3250 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3251 scheduler makes every insn in a block anti-dependent on the final
3252 jump_insn. We adjust here so higher cost insns will get scheduled
3254 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3255 return insn_cost (dep_insn) - 3;
3260 /* Create a CONST_DOUBLE from a string. */
3262 spu_float_const (const char *string, enum machine_mode mode)
3264 REAL_VALUE_TYPE value;
3265 value = REAL_VALUE_ATOF (string, mode);
3266 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3270 spu_constant_address_p (rtx x)
3272 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3273 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3274 || GET_CODE (x) == HIGH);
3277 static enum spu_immediate
3278 which_immediate_load (HOST_WIDE_INT val)
3280 gcc_assert (val == trunc_int_for_mode (val, SImode));
3282 if (val >= -0x8000 && val <= 0x7fff)
3284 if (val >= 0 && val <= 0x3ffff)
3286 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3288 if ((val & 0xffff) == 0)
3294 /* Return true when OP can be loaded by one of the il instructions, or
3295 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3297 immediate_load_p (rtx op, enum machine_mode mode)
3299 if (CONSTANT_P (op))
3301 enum immediate_class c = classify_immediate (op, mode);
3302 return c == IC_IL1 || c == IC_IL1s
3303 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3308 /* Return true if the first SIZE bytes of arr is a constant that can be
3309 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3310 represent the size and offset of the instruction to use. */
3312 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3314 int cpat, run, i, start;
3318 for (i = 0; i < size && cpat; i++)
3326 else if (arr[i] == 2 && arr[i+1] == 3)
3328 else if (arr[i] == 0)
3330 while (arr[i+run] == run && i+run < 16)
3332 if (run != 4 && run != 8)
3337 if ((i & (run-1)) != 0)
3344 if (cpat && (run || size < 16))
3351 *pstart = start == -1 ? 16-run : start;
3357 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3358 it into a register. MODE is only valid when OP is a CONST_INT. */
3359 static enum immediate_class
3360 classify_immediate (rtx op, enum machine_mode mode)
3363 unsigned char arr[16];
3364 int i, j, repeated, fsmbi, repeat;
3366 gcc_assert (CONSTANT_P (op));
3368 if (GET_MODE (op) != VOIDmode)
3369 mode = GET_MODE (op);
3371 /* A V4SI const_vector with all identical symbols is ok. */
3374 && GET_CODE (op) == CONST_VECTOR
3375 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3376 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3377 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3378 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3379 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3380 op = CONST_VECTOR_ELT (op, 0);
3382 switch (GET_CODE (op))
3386 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3389 /* We can never know if the resulting address fits in 18 bits and can be
3390 loaded with ila. For now, assume the address will not overflow if
3391 the displacement is "small" (fits 'K' constraint). */
3392 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3394 rtx sym = XEXP (XEXP (op, 0), 0);
3395 rtx cst = XEXP (XEXP (op, 0), 1);
3397 if (GET_CODE (sym) == SYMBOL_REF
3398 && GET_CODE (cst) == CONST_INT
3399 && satisfies_constraint_K (cst))
3408 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3409 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3410 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3416 constant_to_array (mode, op, arr);
3418 /* Check that each 4-byte slot is identical. */
3420 for (i = 4; i < 16; i += 4)
3421 for (j = 0; j < 4; j++)
3422 if (arr[j] != arr[i + j])
3427 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3428 val = trunc_int_for_mode (val, SImode);
3430 if (which_immediate_load (val) != SPU_NONE)
3434 /* Any mode of 2 bytes or smaller can be loaded with an il
3436 gcc_assert (GET_MODE_SIZE (mode) > 2);
3440 for (i = 0; i < 16 && fsmbi; i++)
3441 if (arr[i] != 0 && repeat == 0)
3443 else if (arr[i] != 0 && arr[i] != repeat)
3446 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3448 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3461 static enum spu_immediate
3462 which_logical_immediate (HOST_WIDE_INT val)
3464 gcc_assert (val == trunc_int_for_mode (val, SImode));
3466 if (val >= -0x200 && val <= 0x1ff)
3468 if (val >= 0 && val <= 0xffff)
3470 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3472 val = trunc_int_for_mode (val, HImode);
3473 if (val >= -0x200 && val <= 0x1ff)
3475 if ((val & 0xff) == ((val >> 8) & 0xff))
3477 val = trunc_int_for_mode (val, QImode);
3478 if (val >= -0x200 && val <= 0x1ff)
3485 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3488 const_vector_immediate_p (rtx x)
3491 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3492 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3493 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3494 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3500 logical_immediate_p (rtx op, enum machine_mode mode)
3503 unsigned char arr[16];
3506 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3507 || GET_CODE (op) == CONST_VECTOR);
3509 if (GET_CODE (op) == CONST_VECTOR
3510 && !const_vector_immediate_p (op))
3513 if (GET_MODE (op) != VOIDmode)
3514 mode = GET_MODE (op);
3516 constant_to_array (mode, op, arr);
3518 /* Check that bytes are repeated. */
3519 for (i = 4; i < 16; i += 4)
3520 for (j = 0; j < 4; j++)
3521 if (arr[j] != arr[i + j])
3524 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3525 val = trunc_int_for_mode (val, SImode);
3527 i = which_logical_immediate (val);
3528 return i != SPU_NONE && i != SPU_IOHL;
3532 iohl_immediate_p (rtx op, enum machine_mode mode)
3535 unsigned char arr[16];
3538 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3539 || GET_CODE (op) == CONST_VECTOR);
3541 if (GET_CODE (op) == CONST_VECTOR
3542 && !const_vector_immediate_p (op))
3545 if (GET_MODE (op) != VOIDmode)
3546 mode = GET_MODE (op);
3548 constant_to_array (mode, op, arr);
3550 /* Check that bytes are repeated. */
3551 for (i = 4; i < 16; i += 4)
3552 for (j = 0; j < 4; j++)
3553 if (arr[j] != arr[i + j])
3556 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3557 val = trunc_int_for_mode (val, SImode);
3559 return val >= 0 && val <= 0xffff;
3563 arith_immediate_p (rtx op, enum machine_mode mode,
3564 HOST_WIDE_INT low, HOST_WIDE_INT high)
3567 unsigned char arr[16];
3570 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3571 || GET_CODE (op) == CONST_VECTOR);
3573 if (GET_CODE (op) == CONST_VECTOR
3574 && !const_vector_immediate_p (op))
3577 if (GET_MODE (op) != VOIDmode)
3578 mode = GET_MODE (op);
3580 constant_to_array (mode, op, arr);
3582 if (VECTOR_MODE_P (mode))
3583 mode = GET_MODE_INNER (mode);
3585 bytes = GET_MODE_SIZE (mode);
3586 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3588 /* Check that bytes are repeated. */
3589 for (i = bytes; i < 16; i += bytes)
3590 for (j = 0; j < bytes; j++)
3591 if (arr[j] != arr[i + j])
3595 for (j = 1; j < bytes; j++)
3596 val = (val << 8) | arr[j];
3598 val = trunc_int_for_mode (val, mode);
3600 return val >= low && val <= high;
3603 /* TRUE when op is an immediate and an exact power of 2, and given that
3604 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3605 all entries must be the same. */
3607 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3609 enum machine_mode int_mode;
3611 unsigned char arr[16];
3614 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3615 || GET_CODE (op) == CONST_VECTOR);
3617 if (GET_CODE (op) == CONST_VECTOR
3618 && !const_vector_immediate_p (op))
3621 if (GET_MODE (op) != VOIDmode)
3622 mode = GET_MODE (op);
3624 constant_to_array (mode, op, arr);
3626 if (VECTOR_MODE_P (mode))
3627 mode = GET_MODE_INNER (mode);
3629 bytes = GET_MODE_SIZE (mode);
3630 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3632 /* Check that bytes are repeated. */
3633 for (i = bytes; i < 16; i += bytes)
3634 for (j = 0; j < bytes; j++)
3635 if (arr[j] != arr[i + j])
3639 for (j = 1; j < bytes; j++)
3640 val = (val << 8) | arr[j];
3642 val = trunc_int_for_mode (val, int_mode);
3644 /* Currently, we only handle SFmode */
3645 gcc_assert (mode == SFmode);
3648 int exp = (val >> 23) - 127;
3649 return val > 0 && (val & 0x007fffff) == 0
3650 && exp >= low && exp <= high;
3655 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3658 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3663 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3665 rtx plus = XEXP (x, 0);
3666 rtx op0 = XEXP (plus, 0);
3667 rtx op1 = XEXP (plus, 1);
3668 if (GET_CODE (op1) == CONST_INT)
3672 return (GET_CODE (x) == SYMBOL_REF
3673 && (decl = SYMBOL_REF_DECL (x)) != 0
3674 && TREE_CODE (decl) == VAR_DECL
3675 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3679 - any 32-bit constant (SImode, SFmode)
3680 - any constant that can be generated with fsmbi (any mode)
3681 - a 64-bit constant where the high and low bits are identical
3683 - a 128-bit constant where the four 32-bit words match. */
3685 spu_legitimate_constant_p (rtx x)
3687 if (GET_CODE (x) == HIGH)
3690 /* Reject any __ea qualified reference. These can't appear in
3691 instructions but must be forced to the constant pool. */
3692 if (for_each_rtx (&x, ea_symbol_ref, 0))
3695 /* V4SI with all identical symbols is valid. */
3697 && GET_MODE (x) == V4SImode
3698 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3699 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3700 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3701 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3702 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3703 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3705 if (GET_CODE (x) == CONST_VECTOR
3706 && !const_vector_immediate_p (x))
3711 /* Valid address are:
3712 - symbol_ref, label_ref, const
3714 - reg + const_int, where const_int is 16 byte aligned
3715 - reg + reg, alignment doesn't matter
3716 The alignment matters in the reg+const case because lqd and stqd
3717 ignore the 4 least significant bits of the const. We only care about
3718 16 byte modes because the expand phase will change all smaller MEM
3719 references to TImode. */
3721 spu_legitimate_address_p (enum machine_mode mode,
3722 rtx x, bool reg_ok_strict)
3724 int aligned = GET_MODE_SIZE (mode) >= 16;
3726 && GET_CODE (x) == AND
3727 && GET_CODE (XEXP (x, 1)) == CONST_INT
3728 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3730 switch (GET_CODE (x))
3733 return !TARGET_LARGE_MEM;
3737 /* Keep __ea references until reload so that spu_expand_mov can see them
3739 if (ea_symbol_ref (&x, 0))
3740 return !reload_in_progress && !reload_completed;
3741 return !TARGET_LARGE_MEM;
3744 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3752 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3757 rtx op0 = XEXP (x, 0);
3758 rtx op1 = XEXP (x, 1);
3759 if (GET_CODE (op0) == SUBREG)
3760 op0 = XEXP (op0, 0);
3761 if (GET_CODE (op1) == SUBREG)
3762 op1 = XEXP (op1, 0);
3763 if (GET_CODE (op0) == REG
3764 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3765 && GET_CODE (op1) == CONST_INT
3766 && INTVAL (op1) >= -0x2000
3767 && INTVAL (op1) <= 0x1fff
3768 && (!aligned || (INTVAL (op1) & 15) == 0))
3770 if (GET_CODE (op0) == REG
3771 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3772 && GET_CODE (op1) == REG
3773 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3784 /* Like spu_legitimate_address_p, except with named addresses. */
3786 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3787 bool reg_ok_strict, addr_space_t as)
3789 if (as == ADDR_SPACE_EA)
3790 return (REG_P (x) && (GET_MODE (x) == EAmode));
3792 else if (as != ADDR_SPACE_GENERIC)
3795 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3798 /* When the address is reg + const_int, force the const_int into a
3801 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3802 enum machine_mode mode ATTRIBUTE_UNUSED)
3805 /* Make sure both operands are registers. */
3806 if (GET_CODE (x) == PLUS)
3810 if (ALIGNED_SYMBOL_REF_P (op0))
3812 op0 = force_reg (Pmode, op0);
3813 mark_reg_pointer (op0, 128);
3815 else if (GET_CODE (op0) != REG)
3816 op0 = force_reg (Pmode, op0);
3817 if (ALIGNED_SYMBOL_REF_P (op1))
3819 op1 = force_reg (Pmode, op1);
3820 mark_reg_pointer (op1, 128);
3822 else if (GET_CODE (op1) != REG)
3823 op1 = force_reg (Pmode, op1);
3824 x = gen_rtx_PLUS (Pmode, op0, op1);
3829 /* Like spu_legitimate_address, except with named address support. */
3831 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3834 if (as != ADDR_SPACE_GENERIC)
3837 return spu_legitimize_address (x, oldx, mode);
3840 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3841 struct attribute_spec.handler. */
3843 spu_handle_fndecl_attribute (tree * node,
3845 tree args ATTRIBUTE_UNUSED,
3846 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3848 if (TREE_CODE (*node) != FUNCTION_DECL)
3850 warning (0, "%qE attribute only applies to functions",
3852 *no_add_attrs = true;
3858 /* Handle the "vector" attribute. */
3860 spu_handle_vector_attribute (tree * node, tree name,
3861 tree args ATTRIBUTE_UNUSED,
3862 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3864 tree type = *node, result = NULL_TREE;
3865 enum machine_mode mode;
3868 while (POINTER_TYPE_P (type)
3869 || TREE_CODE (type) == FUNCTION_TYPE
3870 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3871 type = TREE_TYPE (type);
3873 mode = TYPE_MODE (type);
3875 unsigned_p = TYPE_UNSIGNED (type);
3879 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3882 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3885 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3888 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3891 result = V4SF_type_node;
3894 result = V2DF_type_node;
3900 /* Propagate qualifiers attached to the element type
3901 onto the vector type. */
3902 if (result && result != type && TYPE_QUALS (type))
3903 result = build_qualified_type (result, TYPE_QUALS (type));
3905 *no_add_attrs = true; /* No need to hang on to the attribute. */
3908 warning (0, "%qE attribute ignored", name);
3910 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3915 /* Return nonzero if FUNC is a naked function. */
3917 spu_naked_function_p (tree func)
3921 if (TREE_CODE (func) != FUNCTION_DECL)
3924 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3925 return a != NULL_TREE;
3929 spu_initial_elimination_offset (int from, int to)
3931 int saved_regs_size = spu_saved_regs_size ();
3933 if (!current_function_is_leaf || crtl->outgoing_args_size
3934 || get_frame_size () || saved_regs_size)
3935 sp_offset = STACK_POINTER_OFFSET;
3936 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3937 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3938 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3939 return get_frame_size ();
3940 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3941 return sp_offset + crtl->outgoing_args_size
3942 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3943 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3944 return get_frame_size () + saved_regs_size + sp_offset;
3950 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3952 enum machine_mode mode = TYPE_MODE (type);
3953 int byte_size = ((mode == BLKmode)
3954 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3956 /* Make sure small structs are left justified in a register. */
3957 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3958 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3960 enum machine_mode smode;
3963 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3964 int n = byte_size / UNITS_PER_WORD;
3965 v = rtvec_alloc (nregs);
3966 for (i = 0; i < n; i++)
3968 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3969 gen_rtx_REG (TImode,
3972 GEN_INT (UNITS_PER_WORD * i));
3973 byte_size -= UNITS_PER_WORD;
3981 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3983 gen_rtx_EXPR_LIST (VOIDmode,
3984 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3985 GEN_INT (UNITS_PER_WORD * n));
3987 return gen_rtx_PARALLEL (mode, v);
3989 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3993 spu_function_arg (CUMULATIVE_ARGS cum,
3994 enum machine_mode mode,
3995 tree type, int named ATTRIBUTE_UNUSED)
3999 if (cum >= MAX_REGISTER_ARGS)
4002 byte_size = ((mode == BLKmode)
4003 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4005 /* The ABI does not allow parameters to be passed partially in
4006 reg and partially in stack. */
4007 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4010 /* Make sure small structs are left justified in a register. */
4011 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4012 && byte_size < UNITS_PER_WORD && byte_size > 0)
4014 enum machine_mode smode;
4018 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4019 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4020 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4022 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4025 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4028 /* Variable sized types are passed by reference. */
4030 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4031 enum machine_mode mode ATTRIBUTE_UNUSED,
4032 const_tree type, bool named ATTRIBUTE_UNUSED)
4034 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4040 /* Create and return the va_list datatype.
4042 On SPU, va_list is an array type equivalent to
4044 typedef struct __va_list_tag
4046 void *__args __attribute__((__aligned(16)));
4047 void *__skip __attribute__((__aligned(16)));
4051 where __args points to the arg that will be returned by the next
4052 va_arg(), and __skip points to the previous stack frame such that
4053 when __args == __skip we should advance __args by 32 bytes. */
4055 spu_build_builtin_va_list (void)
4057 tree f_args, f_skip, record, type_decl;
4060 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4063 build_decl (BUILTINS_LOCATION,
4064 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4066 f_args = build_decl (BUILTINS_LOCATION,
4067 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4068 f_skip = build_decl (BUILTINS_LOCATION,
4069 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4071 DECL_FIELD_CONTEXT (f_args) = record;
4072 DECL_ALIGN (f_args) = 128;
4073 DECL_USER_ALIGN (f_args) = 1;
4075 DECL_FIELD_CONTEXT (f_skip) = record;
4076 DECL_ALIGN (f_skip) = 128;
4077 DECL_USER_ALIGN (f_skip) = 1;
4079 TREE_CHAIN (record) = type_decl;
4080 TYPE_NAME (record) = type_decl;
4081 TYPE_FIELDS (record) = f_args;
4082 TREE_CHAIN (f_args) = f_skip;
4084 /* We know this is being padded and we want it too. It is an internal
4085 type so hide the warnings from the user. */
4087 warn_padded = false;
4089 layout_type (record);
4093 /* The correct type is an array type of one element. */
4094 return build_array_type (record, build_index_type (size_zero_node));
4097 /* Implement va_start by filling the va_list structure VALIST.
4098 NEXTARG points to the first anonymous stack argument.
4100 The following global variables are used to initialize
4101 the va_list structure:
4104 the CUMULATIVE_ARGS for this function
4106 crtl->args.arg_offset_rtx:
4107 holds the offset of the first anonymous stack argument
4108 (relative to the virtual arg pointer). */
4111 spu_va_start (tree valist, rtx nextarg)
4113 tree f_args, f_skip;
4116 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4117 f_skip = TREE_CHAIN (f_args);
4119 valist = build_va_arg_indirect_ref (valist);
4121 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4123 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4125 /* Find the __args area. */
4126 t = make_tree (TREE_TYPE (args), nextarg);
4127 if (crtl->args.pretend_args_size > 0)
4128 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4129 size_int (-STACK_POINTER_OFFSET));
4130 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4131 TREE_SIDE_EFFECTS (t) = 1;
4132 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4134 /* Find the __skip area. */
4135 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4136 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4137 size_int (crtl->args.pretend_args_size
4138 - STACK_POINTER_OFFSET));
4139 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4140 TREE_SIDE_EFFECTS (t) = 1;
4141 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4144 /* Gimplify va_arg by updating the va_list structure
4145 VALIST as required to retrieve an argument of type
4146 TYPE, and returning that argument.
4148 ret = va_arg(VALIST, TYPE);
4150 generates code equivalent to:
4152 paddedsize = (sizeof(TYPE) + 15) & -16;
4153 if (VALIST.__args + paddedsize > VALIST.__skip
4154 && VALIST.__args <= VALIST.__skip)
4155 addr = VALIST.__skip + 32;
4157 addr = VALIST.__args;
4158 VALIST.__args = addr + paddedsize;
4159 ret = *(TYPE *)addr;
4162 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4163 gimple_seq * post_p ATTRIBUTE_UNUSED)
4165 tree f_args, f_skip;
4167 HOST_WIDE_INT size, rsize;
4168 tree paddedsize, addr, tmp;
4169 bool pass_by_reference_p;
4171 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4172 f_skip = TREE_CHAIN (f_args);
4174 valist = build_simple_mem_ref (valist);
4176 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4178 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4180 addr = create_tmp_var (ptr_type_node, "va_arg");
4182 /* if an object is dynamically sized, a pointer to it is passed
4183 instead of the object itself. */
4184 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4186 if (pass_by_reference_p)
4187 type = build_pointer_type (type);
4188 size = int_size_in_bytes (type);
4189 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4191 /* build conditional expression to calculate addr. The expression
4192 will be gimplified later. */
4193 paddedsize = size_int (rsize);
4194 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4195 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4196 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4197 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4198 unshare_expr (skip)));
4200 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4201 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4202 size_int (32)), unshare_expr (args));
4204 gimplify_assign (addr, tmp, pre_p);
4206 /* update VALIST.__args */
4207 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4208 gimplify_assign (unshare_expr (args), tmp, pre_p);
4210 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4213 if (pass_by_reference_p)
4214 addr = build_va_arg_indirect_ref (addr);
4216 return build_va_arg_indirect_ref (addr);
4219 /* Save parameter registers starting with the register that corresponds
4220 to the first unnamed parameters. If the first unnamed parameter is
4221 in the stack then save no registers. Set pretend_args_size to the
4222 amount of space needed to save the registers. */
4224 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4225 tree type, int *pretend_size, int no_rtl)
4234 /* cum currently points to the last named argument, we want to
4235 start at the next argument. */
4236 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4238 offset = -STACK_POINTER_OFFSET;
4239 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4241 tmp = gen_frame_mem (V4SImode,
4242 plus_constant (virtual_incoming_args_rtx,
4244 emit_move_insn (tmp,
4245 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4248 *pretend_size = offset + STACK_POINTER_OFFSET;
4253 spu_conditional_register_usage (void)
4257 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4258 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4262 /* This is called any time we inspect the alignment of a register for
4265 reg_aligned_for_addr (rtx x)
4268 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4269 return REGNO_POINTER_ALIGN (regno) >= 128;
4272 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4273 into its SYMBOL_REF_FLAGS. */
4275 spu_encode_section_info (tree decl, rtx rtl, int first)
4277 default_encode_section_info (decl, rtl, first);
4279 /* If a variable has a forced alignment to < 16 bytes, mark it with
4280 SYMBOL_FLAG_ALIGN1. */
4281 if (TREE_CODE (decl) == VAR_DECL
4282 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4283 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4286 /* Return TRUE if we are certain the mem refers to a complete object
4287 which is both 16-byte aligned and padded to a 16-byte boundary. This
4288 would make it safe to store with a single instruction.
4289 We guarantee the alignment and padding for static objects by aligning
4290 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4291 FIXME: We currently cannot guarantee this for objects on the stack
4292 because assign_parm_setup_stack calls assign_stack_local with the
4293 alignment of the parameter mode and in that case the alignment never
4294 gets adjusted by LOCAL_ALIGNMENT. */
4296 store_with_one_insn_p (rtx mem)
4298 enum machine_mode mode = GET_MODE (mem);
4299 rtx addr = XEXP (mem, 0);
4300 if (mode == BLKmode)
4302 if (GET_MODE_SIZE (mode) >= 16)
4304 /* Only static objects. */
4305 if (GET_CODE (addr) == SYMBOL_REF)
4307 /* We use the associated declaration to make sure the access is
4308 referring to the whole object.
4309 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4310 if it is necessary. Will there be cases where one exists, and
4311 the other does not? Will there be cases where both exist, but
4312 have different types? */
4313 tree decl = MEM_EXPR (mem);
4315 && TREE_CODE (decl) == VAR_DECL
4316 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4318 decl = SYMBOL_REF_DECL (addr);
4320 && TREE_CODE (decl) == VAR_DECL
4321 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4327 /* Return 1 when the address is not valid for a simple load and store as
4328 required by the '_mov*' patterns. We could make this less strict
4329 for loads, but we prefer mem's to look the same so they are more
4330 likely to be merged. */
4332 address_needs_split (rtx mem)
4334 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4335 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4336 || !(store_with_one_insn_p (mem)
4337 || mem_is_padded_component_ref (mem))))
4343 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4344 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4345 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4347 /* MEM is known to be an __ea qualified memory access. Emit a call to
4348 fetch the ppu memory to local store, and return its address in local
4352 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4356 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4357 if (!cache_fetch_dirty)
4358 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4359 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4360 2, ea_addr, EAmode, ndirty, SImode);
4365 cache_fetch = init_one_libfunc ("__cache_fetch");
4366 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4367 1, ea_addr, EAmode);
4371 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4372 dirty bit marking, inline.
4374 The cache control data structure is an array of
4376 struct __cache_tag_array
4378 unsigned int tag_lo[4];
4379 unsigned int tag_hi[4];
4380 void *data_pointer[4];
4382 vector unsigned short dirty_bits[4];
4386 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4390 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4391 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4392 rtx index_mask = gen_reg_rtx (SImode);
4393 rtx tag_arr = gen_reg_rtx (Pmode);
4394 rtx splat_mask = gen_reg_rtx (TImode);
4395 rtx splat = gen_reg_rtx (V4SImode);
4396 rtx splat_hi = NULL_RTX;
4397 rtx tag_index = gen_reg_rtx (Pmode);
4398 rtx block_off = gen_reg_rtx (SImode);
4399 rtx tag_addr = gen_reg_rtx (Pmode);
4400 rtx tag = gen_reg_rtx (V4SImode);
4401 rtx cache_tag = gen_reg_rtx (V4SImode);
4402 rtx cache_tag_hi = NULL_RTX;
4403 rtx cache_ptrs = gen_reg_rtx (TImode);
4404 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4405 rtx tag_equal = gen_reg_rtx (V4SImode);
4406 rtx tag_equal_hi = NULL_RTX;
4407 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4408 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4409 rtx eq_index = gen_reg_rtx (SImode);
4410 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4412 if (spu_ea_model != 32)
4414 splat_hi = gen_reg_rtx (V4SImode);
4415 cache_tag_hi = gen_reg_rtx (V4SImode);
4416 tag_equal_hi = gen_reg_rtx (V4SImode);
4419 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4420 emit_move_insn (tag_arr, tag_arr_sym);
4421 v = 0x0001020300010203LL;
4422 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4423 ea_addr_si = ea_addr;
4424 if (spu_ea_model != 32)
4425 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4427 /* tag_index = ea_addr & (tag_array_size - 128) */
4428 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4430 /* splat ea_addr to all 4 slots. */
4431 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4432 /* Similarly for high 32 bits of ea_addr. */
4433 if (spu_ea_model != 32)
4434 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4436 /* block_off = ea_addr & 127 */
4437 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4439 /* tag_addr = tag_arr + tag_index */
4440 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4442 /* Read cache tags. */
4443 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4444 if (spu_ea_model != 32)
4445 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4446 plus_constant (tag_addr, 16)));
4448 /* tag = ea_addr & -128 */
4449 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4451 /* Read all four cache data pointers. */
4452 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4453 plus_constant (tag_addr, 32)));
4456 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4457 if (spu_ea_model != 32)
4459 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4460 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4463 /* At most one of the tags compare equal, so tag_equal has one
4464 32-bit slot set to all 1's, with the other slots all zero.
4465 gbb picks off low bit from each byte in the 128-bit registers,
4466 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4468 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4469 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4471 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4472 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4474 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4475 (rotating eq_index mod 16 bytes). */
4476 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4477 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4479 /* Add block offset to form final data address. */
4480 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4482 /* Check that we did hit. */
4483 hit_label = gen_label_rtx ();
4484 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4485 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4486 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4487 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4489 /* Say that this branch is very likely to happen. */
4490 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4491 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
4493 ea_load_store (mem, is_store, ea_addr, data_addr);
4494 cont_label = gen_label_rtx ();
4495 emit_jump_insn (gen_jump (cont_label));
4498 emit_label (hit_label);
4503 rtx dirty_bits = gen_reg_rtx (TImode);
4504 rtx dirty_off = gen_reg_rtx (SImode);
4505 rtx dirty_128 = gen_reg_rtx (TImode);
4506 rtx neg_block_off = gen_reg_rtx (SImode);
4508 /* Set up mask with one dirty bit per byte of the mem we are
4509 writing, starting from top bit. */
4511 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4512 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4517 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4519 /* Form index into cache dirty_bits. eq_index is one of
4520 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4521 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4522 offset to each of the four dirty_bits elements. */
4523 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4525 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4527 /* Rotate bit mask to proper bit. */
4528 emit_insn (gen_negsi2 (neg_block_off, block_off));
4529 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4530 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4532 /* Or in the new dirty bits. */
4533 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4536 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4539 emit_label (cont_label);
4543 expand_ea_mem (rtx mem, bool is_store)
4546 rtx data_addr = gen_reg_rtx (Pmode);
4549 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4550 if (optimize_size || optimize == 0)
4551 ea_load_store (mem, is_store, ea_addr, data_addr);
4553 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4555 if (ea_alias_set == -1)
4556 ea_alias_set = new_alias_set ();
4558 /* We generate a new MEM RTX to refer to the copy of the data
4559 in the cache. We do not copy memory attributes (except the
4560 alignment) from the original MEM, as they may no longer apply
4561 to the cache copy. */
4562 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4563 set_mem_alias_set (new_mem, ea_alias_set);
4564 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4570 spu_expand_mov (rtx * ops, enum machine_mode mode)
4572 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4575 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4577 rtx from = SUBREG_REG (ops[1]);
4578 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4580 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4581 && GET_MODE_CLASS (imode) == MODE_INT
4582 && subreg_lowpart_p (ops[1]));
4584 if (GET_MODE_SIZE (imode) < 4)
4586 if (imode != GET_MODE (from))
4587 from = gen_rtx_SUBREG (imode, from, 0);
4589 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4591 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4592 emit_insn (GEN_FCN (icode) (ops[0], from));
4595 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4599 /* At least one of the operands needs to be a register. */
4600 if ((reload_in_progress | reload_completed) == 0
4601 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4603 rtx temp = force_reg (mode, ops[1]);
4604 emit_move_insn (ops[0], temp);
4607 if (reload_in_progress || reload_completed)
4609 if (CONSTANT_P (ops[1]))
4610 return spu_split_immediate (ops);
4614 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4616 if (GET_CODE (ops[1]) == CONST_INT)
4618 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4619 if (val != INTVAL (ops[1]))
4621 emit_move_insn (ops[0], GEN_INT (val));
4627 if (MEM_ADDR_SPACE (ops[0]))
4628 ops[0] = expand_ea_mem (ops[0], true);
4629 return spu_split_store (ops);
4633 if (MEM_ADDR_SPACE (ops[1]))
4634 ops[1] = expand_ea_mem (ops[1], false);
4635 return spu_split_load (ops);
4642 spu_convert_move (rtx dst, rtx src)
4644 enum machine_mode mode = GET_MODE (dst);
4645 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4647 gcc_assert (GET_MODE (src) == TImode);
4648 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4649 emit_insn (gen_rtx_SET (VOIDmode, reg,
4650 gen_rtx_TRUNCATE (int_mode,
4651 gen_rtx_LSHIFTRT (TImode, src,
4652 GEN_INT (int_mode == DImode ? 64 : 96)))));
4653 if (int_mode != mode)
4655 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4656 emit_move_insn (dst, reg);
4660 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4661 the address from SRC and SRC+16. Return a REG or CONST_INT that
4662 specifies how many bytes to rotate the loaded registers, plus any
4663 extra from EXTRA_ROTQBY. The address and rotate amounts are
4664 normalized to improve merging of loads and rotate computations. */
4666 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4668 rtx addr = XEXP (src, 0);
4669 rtx p0, p1, rot, addr0, addr1;
4675 if (MEM_ALIGN (src) >= 128)
4676 /* Address is already aligned; simply perform a TImode load. */ ;
4677 else if (GET_CODE (addr) == PLUS)
4680 aligned reg + aligned reg => lqx
4681 aligned reg + unaligned reg => lqx, rotqby
4682 aligned reg + aligned const => lqd
4683 aligned reg + unaligned const => lqd, rotqbyi
4684 unaligned reg + aligned reg => lqx, rotqby
4685 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4686 unaligned reg + aligned const => lqd, rotqby
4687 unaligned reg + unaligned const -> not allowed by legitimate address
4689 p0 = XEXP (addr, 0);
4690 p1 = XEXP (addr, 1);
4691 if (!reg_aligned_for_addr (p0))
4693 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4695 rot = gen_reg_rtx (SImode);
4696 emit_insn (gen_addsi3 (rot, p0, p1));
4698 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4702 && INTVAL (p1) * BITS_PER_UNIT
4703 < REGNO_POINTER_ALIGN (REGNO (p0)))
4705 rot = gen_reg_rtx (SImode);
4706 emit_insn (gen_addsi3 (rot, p0, p1));
4711 rtx x = gen_reg_rtx (SImode);
4712 emit_move_insn (x, p1);
4713 if (!spu_arith_operand (p1, SImode))
4715 rot = gen_reg_rtx (SImode);
4716 emit_insn (gen_addsi3 (rot, p0, p1));
4717 addr = gen_rtx_PLUS (Pmode, p0, x);
4725 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4727 rot_amt = INTVAL (p1) & 15;
4728 if (INTVAL (p1) & -16)
4730 p1 = GEN_INT (INTVAL (p1) & -16);
4731 addr = gen_rtx_PLUS (SImode, p0, p1);
4736 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4740 else if (REG_P (addr))
4742 if (!reg_aligned_for_addr (addr))
4745 else if (GET_CODE (addr) == CONST)
4747 if (GET_CODE (XEXP (addr, 0)) == PLUS
4748 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4749 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4751 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4753 addr = gen_rtx_CONST (Pmode,
4754 gen_rtx_PLUS (Pmode,
4755 XEXP (XEXP (addr, 0), 0),
4756 GEN_INT (rot_amt & -16)));
4758 addr = XEXP (XEXP (addr, 0), 0);
4762 rot = gen_reg_rtx (Pmode);
4763 emit_move_insn (rot, addr);
4766 else if (GET_CODE (addr) == CONST_INT)
4768 rot_amt = INTVAL (addr);
4769 addr = GEN_INT (rot_amt & -16);
4771 else if (!ALIGNED_SYMBOL_REF_P (addr))
4773 rot = gen_reg_rtx (Pmode);
4774 emit_move_insn (rot, addr);
4777 rot_amt += extra_rotby;
4783 rtx x = gen_reg_rtx (SImode);
4784 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4788 if (!rot && rot_amt)
4789 rot = GEN_INT (rot_amt);
4791 addr0 = copy_rtx (addr);
4792 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4793 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4797 addr1 = plus_constant (copy_rtx (addr), 16);
4798 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4799 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4806 spu_split_load (rtx * ops)
4808 enum machine_mode mode = GET_MODE (ops[0]);
4809 rtx addr, load, rot;
4812 if (GET_MODE_SIZE (mode) >= 16)
4815 addr = XEXP (ops[1], 0);
4816 gcc_assert (GET_CODE (addr) != AND);
4818 if (!address_needs_split (ops[1]))
4820 ops[1] = change_address (ops[1], TImode, addr);
4821 load = gen_reg_rtx (TImode);
4822 emit_insn (gen__movti (load, ops[1]));
4823 spu_convert_move (ops[0], load);
4827 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4829 load = gen_reg_rtx (TImode);
4830 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4833 emit_insn (gen_rotqby_ti (load, load, rot));
4835 spu_convert_move (ops[0], load);
4840 spu_split_store (rtx * ops)
4842 enum machine_mode mode = GET_MODE (ops[0]);
4844 rtx addr, p0, p1, p1_lo, smem;
4848 if (GET_MODE_SIZE (mode) >= 16)
4851 addr = XEXP (ops[0], 0);
4852 gcc_assert (GET_CODE (addr) != AND);
4854 if (!address_needs_split (ops[0]))
4856 reg = gen_reg_rtx (TImode);
4857 emit_insn (gen_spu_convert (reg, ops[1]));
4858 ops[0] = change_address (ops[0], TImode, addr);
4859 emit_move_insn (ops[0], reg);
4863 if (GET_CODE (addr) == PLUS)
4866 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4867 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4868 aligned reg + aligned const => lqd, c?d, shuf, stqx
4869 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4870 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4871 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4872 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4873 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4876 p0 = XEXP (addr, 0);
4877 p1 = p1_lo = XEXP (addr, 1);
4878 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4880 p1_lo = GEN_INT (INTVAL (p1) & 15);
4881 if (reg_aligned_for_addr (p0))
4883 p1 = GEN_INT (INTVAL (p1) & -16);
4884 if (p1 == const0_rtx)
4887 addr = gen_rtx_PLUS (SImode, p0, p1);
4891 rtx x = gen_reg_rtx (SImode);
4892 emit_move_insn (x, p1);
4893 addr = gen_rtx_PLUS (SImode, p0, x);
4897 else if (REG_P (addr))
4901 p1 = p1_lo = const0_rtx;
4906 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4907 p1 = 0; /* aform doesn't use p1 */
4909 if (ALIGNED_SYMBOL_REF_P (addr))
4911 else if (GET_CODE (addr) == CONST
4912 && GET_CODE (XEXP (addr, 0)) == PLUS
4913 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4914 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4916 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4918 addr = gen_rtx_CONST (Pmode,
4919 gen_rtx_PLUS (Pmode,
4920 XEXP (XEXP (addr, 0), 0),
4921 GEN_INT (v & -16)));
4923 addr = XEXP (XEXP (addr, 0), 0);
4924 p1_lo = GEN_INT (v & 15);
4926 else if (GET_CODE (addr) == CONST_INT)
4928 p1_lo = GEN_INT (INTVAL (addr) & 15);
4929 addr = GEN_INT (INTVAL (addr) & -16);
4933 p1_lo = gen_reg_rtx (SImode);
4934 emit_move_insn (p1_lo, addr);
4938 reg = gen_reg_rtx (TImode);
4940 scalar = store_with_one_insn_p (ops[0]);
4943 /* We could copy the flags from the ops[0] MEM to mem here,
4944 We don't because we want this load to be optimized away if
4945 possible, and copying the flags will prevent that in certain
4946 cases, e.g. consider the volatile flag. */
4948 rtx pat = gen_reg_rtx (TImode);
4949 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4950 set_mem_alias_set (lmem, 0);
4951 emit_insn (gen_movti (reg, lmem));
4953 if (!p0 || reg_aligned_for_addr (p0))
4954 p0 = stack_pointer_rtx;
4958 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4959 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4963 if (GET_CODE (ops[1]) == REG)
4964 emit_insn (gen_spu_convert (reg, ops[1]));
4965 else if (GET_CODE (ops[1]) == SUBREG)
4966 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4971 if (GET_MODE_SIZE (mode) < 4 && scalar)
4972 emit_insn (gen_ashlti3
4973 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4975 smem = change_address (ops[0], TImode, copy_rtx (addr));
4976 /* We can't use the previous alias set because the memory has changed
4977 size and can potentially overlap objects of other types. */
4978 set_mem_alias_set (smem, 0);
4980 emit_insn (gen_movti (smem, reg));
4984 /* Return TRUE if X is MEM which is a struct member reference
4985 and the member can safely be loaded and stored with a single
4986 instruction because it is padded. */
4988 mem_is_padded_component_ref (rtx x)
4990 tree t = MEM_EXPR (x);
4992 if (!t || TREE_CODE (t) != COMPONENT_REF)
4994 t = TREE_OPERAND (t, 1);
4995 if (!t || TREE_CODE (t) != FIELD_DECL
4996 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4998 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4999 r = DECL_FIELD_CONTEXT (t);
5000 if (!r || TREE_CODE (r) != RECORD_TYPE)
5002 /* Make sure they are the same mode */
5003 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5005 /* If there are no following fields then the field alignment assures
5006 the structure is padded to the alignment which means this field is
5008 if (TREE_CHAIN (t) == 0)
5010 /* If the following field is also aligned then this field will be
5013 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5018 /* Parse the -mfixed-range= option string. */
5020 fix_range (const char *const_str)
5023 char *str, *dash, *comma;
5025 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5026 REG2 are either register names or register numbers. The effect
5027 of this option is to mark the registers in the range from REG1 to
5028 REG2 as ``fixed'' so they won't be used by the compiler. */
5030 i = strlen (const_str);
5031 str = (char *) alloca (i + 1);
5032 memcpy (str, const_str, i + 1);
5036 dash = strchr (str, '-');
5039 warning (0, "value of -mfixed-range must have form REG1-REG2");
5043 comma = strchr (dash + 1, ',');
5047 first = decode_reg_name (str);
5050 warning (0, "unknown register name: %s", str);
5054 last = decode_reg_name (dash + 1);
5057 warning (0, "unknown register name: %s", dash + 1);
5065 warning (0, "%s-%s is an empty range", str, dash + 1);
5069 for (i = first; i <= last; ++i)
5070 fixed_regs[i] = call_used_regs[i] = 1;
5080 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5081 can be generated using the fsmbi instruction. */
5083 fsmbi_const_p (rtx x)
5087 /* We can always choose TImode for CONST_INT because the high bits
5088 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5089 enum immediate_class c = classify_immediate (x, TImode);
5090 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5095 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5096 can be generated using the cbd, chd, cwd or cdd instruction. */
5098 cpat_const_p (rtx x, enum machine_mode mode)
5102 enum immediate_class c = classify_immediate (x, mode);
5103 return c == IC_CPAT;
5109 gen_cpat_const (rtx * ops)
5111 unsigned char dst[16];
5112 int i, offset, shift, isize;
5113 if (GET_CODE (ops[3]) != CONST_INT
5114 || GET_CODE (ops[2]) != CONST_INT
5115 || (GET_CODE (ops[1]) != CONST_INT
5116 && GET_CODE (ops[1]) != REG))
5118 if (GET_CODE (ops[1]) == REG
5119 && (!REG_POINTER (ops[1])
5120 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5123 for (i = 0; i < 16; i++)
5125 isize = INTVAL (ops[3]);
5128 else if (isize == 2)
5132 offset = (INTVAL (ops[2]) +
5133 (GET_CODE (ops[1]) ==
5134 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5135 for (i = 0; i < isize; i++)
5136 dst[offset + i] = i + shift;
5137 return array_to_constant (TImode, dst);
5140 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5141 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5142 than 16 bytes, the value is repeated across the rest of the array. */
5144 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5149 memset (arr, 0, 16);
5150 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5151 if (GET_CODE (x) == CONST_INT
5152 || (GET_CODE (x) == CONST_DOUBLE
5153 && (mode == SFmode || mode == DFmode)))
5155 gcc_assert (mode != VOIDmode && mode != BLKmode);
5157 if (GET_CODE (x) == CONST_DOUBLE)
5158 val = const_double_to_hwint (x);
5161 first = GET_MODE_SIZE (mode) - 1;
5162 for (i = first; i >= 0; i--)
5164 arr[i] = val & 0xff;
5167 /* Splat the constant across the whole array. */
5168 for (j = 0, i = first + 1; i < 16; i++)
5171 j = (j == first) ? 0 : j + 1;
5174 else if (GET_CODE (x) == CONST_DOUBLE)
5176 val = CONST_DOUBLE_LOW (x);
5177 for (i = 15; i >= 8; i--)
5179 arr[i] = val & 0xff;
5182 val = CONST_DOUBLE_HIGH (x);
5183 for (i = 7; i >= 0; i--)
5185 arr[i] = val & 0xff;
5189 else if (GET_CODE (x) == CONST_VECTOR)
5193 mode = GET_MODE_INNER (mode);
5194 units = CONST_VECTOR_NUNITS (x);
5195 for (i = 0; i < units; i++)
5197 elt = CONST_VECTOR_ELT (x, i);
5198 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5200 if (GET_CODE (elt) == CONST_DOUBLE)
5201 val = const_double_to_hwint (elt);
5204 first = GET_MODE_SIZE (mode) - 1;
5205 if (first + i * GET_MODE_SIZE (mode) > 16)
5207 for (j = first; j >= 0; j--)
5209 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5219 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5220 smaller than 16 bytes, use the bytes that would represent that value
5221 in a register, e.g., for QImode return the value of arr[3]. */
5223 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5225 enum machine_mode inner_mode;
5227 int units, size, i, j, k;
5230 if (GET_MODE_CLASS (mode) == MODE_INT
5231 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5233 j = GET_MODE_SIZE (mode);
5234 i = j < 4 ? 4 - j : 0;
5235 for (val = 0; i < j; i++)
5236 val = (val << 8) | arr[i];
5237 val = trunc_int_for_mode (val, mode);
5238 return GEN_INT (val);
5244 for (i = high = 0; i < 8; i++)
5245 high = (high << 8) | arr[i];
5246 for (i = 8, val = 0; i < 16; i++)
5247 val = (val << 8) | arr[i];
5248 return immed_double_const (val, high, TImode);
5252 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5253 val = trunc_int_for_mode (val, SImode);
5254 return hwint_to_const_double (SFmode, val);
5258 for (i = 0, val = 0; i < 8; i++)
5259 val = (val << 8) | arr[i];
5260 return hwint_to_const_double (DFmode, val);
5263 if (!VECTOR_MODE_P (mode))
5266 units = GET_MODE_NUNITS (mode);
5267 size = GET_MODE_UNIT_SIZE (mode);
5268 inner_mode = GET_MODE_INNER (mode);
5269 v = rtvec_alloc (units);
5271 for (k = i = 0; i < units; ++i)
5274 for (j = 0; j < size; j++, k++)
5275 val = (val << 8) | arr[k];
5277 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5278 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5280 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5285 return gen_rtx_CONST_VECTOR (mode, v);
5289 reloc_diagnostic (rtx x)
5292 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5295 if (GET_CODE (x) == SYMBOL_REF)
5296 decl = SYMBOL_REF_DECL (x);
5297 else if (GET_CODE (x) == CONST
5298 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5299 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5301 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5302 if (decl && !DECL_P (decl))
5305 /* The decl could be a string constant. */
5306 if (decl && DECL_P (decl))
5309 /* We use last_assemble_variable_decl to get line information. It's
5310 not always going to be right and might not even be close, but will
5311 be right for the more common cases. */
5312 if (!last_assemble_variable_decl || in_section == ctors_section)
5313 loc = DECL_SOURCE_LOCATION (decl);
5315 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5317 if (TARGET_WARN_RELOC)
5319 "creating run-time relocation for %qD", decl);
5322 "creating run-time relocation for %qD", decl);
5326 if (TARGET_WARN_RELOC)
5327 warning_at (input_location, 0, "creating run-time relocation");
5329 error_at (input_location, "creating run-time relocation");
5333 /* Hook into assemble_integer so we can generate an error for run-time
5334 relocations. The SPU ABI disallows them. */
5336 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5338 /* By default run-time relocations aren't supported, but we allow them
5339 in case users support it in their own run-time loader. And we provide
5340 a warning for those users that don't. */
5341 if ((GET_CODE (x) == SYMBOL_REF)
5342 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5343 reloc_diagnostic (x);
5345 return default_assemble_integer (x, size, aligned_p);
5349 spu_asm_globalize_label (FILE * file, const char *name)
5351 fputs ("\t.global\t", file);
5352 assemble_name (file, name);
5357 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5358 bool speed ATTRIBUTE_UNUSED)
5360 enum machine_mode mode = GET_MODE (x);
5361 int cost = COSTS_N_INSNS (2);
5363 /* Folding to a CONST_VECTOR will use extra space but there might
5364 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5365 only if it allows us to fold away multiple insns. Changing the cost
5366 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5367 because this cost will only be compared against a single insn.
5368 if (code == CONST_VECTOR)
5369 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5372 /* Use defaults for float operations. Not accurate but good enough. */
5375 *total = COSTS_N_INSNS (13);
5380 *total = COSTS_N_INSNS (6);
5386 if (satisfies_constraint_K (x))
5388 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5389 *total = COSTS_N_INSNS (1);
5391 *total = COSTS_N_INSNS (3);
5395 *total = COSTS_N_INSNS (3);
5400 *total = COSTS_N_INSNS (0);
5404 *total = COSTS_N_INSNS (5);
5408 case FLOAT_TRUNCATE:
5410 case UNSIGNED_FLOAT:
5413 *total = COSTS_N_INSNS (7);
5419 *total = COSTS_N_INSNS (9);
5426 GET_CODE (XEXP (x, 0)) ==
5427 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5428 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5430 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5432 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5433 cost = COSTS_N_INSNS (14);
5434 if ((val & 0xffff) == 0)
5435 cost = COSTS_N_INSNS (9);
5436 else if (val > 0 && val < 0x10000)
5437 cost = COSTS_N_INSNS (11);
5446 *total = COSTS_N_INSNS (20);
5453 *total = COSTS_N_INSNS (4);
5456 if (XINT (x, 1) == UNSPEC_CONVERT)
5457 *total = COSTS_N_INSNS (0);
5459 *total = COSTS_N_INSNS (4);
5462 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5463 if (GET_MODE_CLASS (mode) == MODE_INT
5464 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5465 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5466 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5471 static enum machine_mode
5472 spu_unwind_word_mode (void)
5477 /* Decide whether we can make a sibling call to a function. DECL is the
5478 declaration of the function being targeted by the call and EXP is the
5479 CALL_EXPR representing the call. */
5481 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5483 return decl && !TARGET_LARGE_MEM;
5486 /* We need to correctly update the back chain pointer and the Available
5487 Stack Size (which is in the second slot of the sp register.) */
5489 spu_allocate_stack (rtx op0, rtx op1)
5492 rtx chain = gen_reg_rtx (V4SImode);
5493 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5494 rtx sp = gen_reg_rtx (V4SImode);
5495 rtx splatted = gen_reg_rtx (V4SImode);
5496 rtx pat = gen_reg_rtx (TImode);
5498 /* copy the back chain so we can save it back again. */
5499 emit_move_insn (chain, stack_bot);
5501 op1 = force_reg (SImode, op1);
5503 v = 0x1020300010203ll;
5504 emit_move_insn (pat, immed_double_const (v, v, TImode));
5505 emit_insn (gen_shufb (splatted, op1, op1, pat));
5507 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5508 emit_insn (gen_subv4si3 (sp, sp, splatted));
5510 if (flag_stack_check)
5512 rtx avail = gen_reg_rtx(SImode);
5513 rtx result = gen_reg_rtx(SImode);
5514 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5515 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5516 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5519 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5521 emit_move_insn (stack_bot, chain);
5523 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5527 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5529 static unsigned char arr[16] =
5530 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5531 rtx temp = gen_reg_rtx (SImode);
5532 rtx temp2 = gen_reg_rtx (SImode);
5533 rtx temp3 = gen_reg_rtx (V4SImode);
5534 rtx temp4 = gen_reg_rtx (V4SImode);
5535 rtx pat = gen_reg_rtx (TImode);
5536 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5538 /* Restore the backchain from the first word, sp from the second. */
5539 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5540 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5542 emit_move_insn (pat, array_to_constant (TImode, arr));
5544 /* Compute Available Stack Size for sp */
5545 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5546 emit_insn (gen_shufb (temp3, temp, temp, pat));
5548 /* Compute Available Stack Size for back chain */
5549 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5550 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5551 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5553 emit_insn (gen_addv4si3 (sp, sp, temp3));
5554 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5558 spu_init_libfuncs (void)
5560 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5561 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5562 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5563 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5564 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5565 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5566 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5567 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5568 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5569 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5570 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5572 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5573 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5575 set_optab_libfunc (smul_optab, TImode, "__multi3");
5576 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5577 set_optab_libfunc (smod_optab, TImode, "__modti3");
5578 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5579 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5580 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5583 /* Make a subreg, stripping any existing subreg. We could possibly just
5584 call simplify_subreg, but in this case we know what we want. */
5586 spu_gen_subreg (enum machine_mode mode, rtx x)
5588 if (GET_CODE (x) == SUBREG)
5590 if (GET_MODE (x) == mode)
5592 return gen_rtx_SUBREG (mode, x, 0);
5596 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5598 return (TYPE_MODE (type) == BLKmode
5600 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5601 || int_size_in_bytes (type) >
5602 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5605 /* Create the built-in types and functions */
5607 enum spu_function_code
5609 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5610 #include "spu-builtins.def"
5615 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5617 struct spu_builtin_description spu_builtins[] = {
5618 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5619 {fcode, icode, name, type, params, NULL_TREE},
5620 #include "spu-builtins.def"
5624 /* Returns the rs6000 builtin decl for CODE. */
5627 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5629 if (code >= NUM_SPU_BUILTINS)
5630 return error_mark_node;
5632 return spu_builtins[code].fndecl;
5637 spu_init_builtins (void)
5639 struct spu_builtin_description *d;
5642 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5643 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5644 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5645 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5646 V4SF_type_node = build_vector_type (float_type_node, 4);
5647 V2DF_type_node = build_vector_type (double_type_node, 2);
5649 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5650 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5651 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5652 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5654 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5656 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5657 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5658 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5659 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5660 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5661 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5662 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5663 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5664 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5665 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5666 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5667 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5669 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5670 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5671 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5672 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5673 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5674 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5675 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5676 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5678 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5679 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5681 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5683 spu_builtin_types[SPU_BTI_PTR] =
5684 build_pointer_type (build_qualified_type
5686 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5688 /* For each builtin we build a new prototype. The tree code will make
5689 sure nodes are shared. */
5690 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5693 char name[64]; /* build_function will make a copy. */
5699 /* Find last parm. */
5700 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5705 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5707 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5709 sprintf (name, "__builtin_%s", d->name);
5711 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5713 if (d->fcode == SPU_MASK_FOR_LOAD)
5714 TREE_READONLY (d->fndecl) = 1;
5716 /* These builtins don't throw. */
5717 TREE_NOTHROW (d->fndecl) = 1;
5722 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5724 static unsigned char arr[16] =
5725 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5727 rtx temp = gen_reg_rtx (Pmode);
5728 rtx temp2 = gen_reg_rtx (V4SImode);
5729 rtx temp3 = gen_reg_rtx (V4SImode);
5730 rtx pat = gen_reg_rtx (TImode);
5731 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5733 emit_move_insn (pat, array_to_constant (TImode, arr));
5735 /* Restore the sp. */
5736 emit_move_insn (temp, op1);
5737 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5739 /* Compute available stack size for sp. */
5740 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5741 emit_insn (gen_shufb (temp3, temp, temp, pat));
5743 emit_insn (gen_addv4si3 (sp, sp, temp3));
5744 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5748 spu_safe_dma (HOST_WIDE_INT channel)
5750 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5754 spu_builtin_splats (rtx ops[])
5756 enum machine_mode mode = GET_MODE (ops[0]);
5757 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5759 unsigned char arr[16];
5760 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5761 emit_move_insn (ops[0], array_to_constant (mode, arr));
5765 rtx reg = gen_reg_rtx (TImode);
5767 if (GET_CODE (ops[1]) != REG
5768 && GET_CODE (ops[1]) != SUBREG)
5769 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5775 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5781 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5786 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5791 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5797 emit_move_insn (reg, shuf);
5798 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5803 spu_builtin_extract (rtx ops[])
5805 enum machine_mode mode;
5808 mode = GET_MODE (ops[1]);
5810 if (GET_CODE (ops[2]) == CONST_INT)
5815 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5818 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5821 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5824 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5827 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5830 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5838 from = spu_gen_subreg (TImode, ops[1]);
5839 rot = gen_reg_rtx (TImode);
5840 tmp = gen_reg_rtx (SImode);
5845 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5848 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5849 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5853 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5857 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5862 emit_insn (gen_rotqby_ti (rot, from, tmp));
5864 emit_insn (gen_spu_convert (ops[0], rot));
5868 spu_builtin_insert (rtx ops[])
5870 enum machine_mode mode = GET_MODE (ops[0]);
5871 enum machine_mode imode = GET_MODE_INNER (mode);
5872 rtx mask = gen_reg_rtx (TImode);
5875 if (GET_CODE (ops[3]) == CONST_INT)
5876 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5879 offset = gen_reg_rtx (SImode);
5880 emit_insn (gen_mulsi3
5881 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5884 (mask, stack_pointer_rtx, offset,
5885 GEN_INT (GET_MODE_SIZE (imode))));
5886 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5890 spu_builtin_promote (rtx ops[])
5892 enum machine_mode mode, imode;
5893 rtx rot, from, offset;
5896 mode = GET_MODE (ops[0]);
5897 imode = GET_MODE_INNER (mode);
5899 from = gen_reg_rtx (TImode);
5900 rot = spu_gen_subreg (TImode, ops[0]);
5902 emit_insn (gen_spu_convert (from, ops[1]));
5904 if (GET_CODE (ops[2]) == CONST_INT)
5906 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5907 if (GET_MODE_SIZE (imode) < 4)
5908 pos += 4 - GET_MODE_SIZE (imode);
5909 offset = GEN_INT (pos & 15);
5913 offset = gen_reg_rtx (SImode);
5917 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5920 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5921 emit_insn (gen_addsi3 (offset, offset, offset));
5925 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5926 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5930 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5936 emit_insn (gen_rotqby_ti (rot, from, offset));
5940 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5942 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5943 rtx shuf = gen_reg_rtx (V4SImode);
5944 rtx insn = gen_reg_rtx (V4SImode);
5949 fnaddr = force_reg (SImode, fnaddr);
5950 cxt = force_reg (SImode, cxt);
5952 if (TARGET_LARGE_MEM)
5954 rtx rotl = gen_reg_rtx (V4SImode);
5955 rtx mask = gen_reg_rtx (V4SImode);
5956 rtx bi = gen_reg_rtx (SImode);
5957 static unsigned char const shufa[16] = {
5958 2, 3, 0, 1, 18, 19, 16, 17,
5959 0, 1, 2, 3, 16, 17, 18, 19
5961 static unsigned char const insna[16] = {
5963 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5965 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5968 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5969 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5971 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5972 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5973 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5974 emit_insn (gen_selb (insn, insnc, rotl, mask));
5976 mem = adjust_address (m_tramp, V4SImode, 0);
5977 emit_move_insn (mem, insn);
5979 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5980 mem = adjust_address (m_tramp, Pmode, 16);
5981 emit_move_insn (mem, bi);
5985 rtx scxt = gen_reg_rtx (SImode);
5986 rtx sfnaddr = gen_reg_rtx (SImode);
5987 static unsigned char const insna[16] = {
5988 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5994 shufc = gen_reg_rtx (TImode);
5995 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5997 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5998 fits 18 bits and the last 4 are zeros. This will be true if
5999 the stack pointer is initialized to 0x3fff0 at program start,
6000 otherwise the ila instruction will be garbage. */
6002 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6003 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6005 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6006 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6007 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6009 mem = adjust_address (m_tramp, V4SImode, 0);
6010 emit_move_insn (mem, insn);
6012 emit_insn (gen_sync ());
6016 spu_expand_sign_extend (rtx ops[])
6018 unsigned char arr[16];
6019 rtx pat = gen_reg_rtx (TImode);
6022 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6023 if (GET_MODE (ops[1]) == QImode)
6025 sign = gen_reg_rtx (HImode);
6026 emit_insn (gen_extendqihi2 (sign, ops[1]));
6027 for (i = 0; i < 16; i++)
6033 for (i = 0; i < 16; i++)
6035 switch (GET_MODE (ops[1]))
6038 sign = gen_reg_rtx (SImode);
6039 emit_insn (gen_extendhisi2 (sign, ops[1]));
6041 arr[last - 1] = 0x02;
6044 sign = gen_reg_rtx (SImode);
6045 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6046 for (i = 0; i < 4; i++)
6047 arr[last - i] = 3 - i;
6050 sign = gen_reg_rtx (SImode);
6051 c = gen_reg_rtx (SImode);
6052 emit_insn (gen_spu_convert (c, ops[1]));
6053 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6054 for (i = 0; i < 8; i++)
6055 arr[last - i] = 7 - i;
6061 emit_move_insn (pat, array_to_constant (TImode, arr));
6062 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6065 /* expand vector initialization. If there are any constant parts,
6066 load constant parts first. Then load any non-constant parts. */
6068 spu_expand_vector_init (rtx target, rtx vals)
6070 enum machine_mode mode = GET_MODE (target);
6071 int n_elts = GET_MODE_NUNITS (mode);
6073 bool all_same = true;
6074 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6077 first = XVECEXP (vals, 0, 0);
6078 for (i = 0; i < n_elts; ++i)
6080 x = XVECEXP (vals, 0, i);
6081 if (!(CONST_INT_P (x)
6082 || GET_CODE (x) == CONST_DOUBLE
6083 || GET_CODE (x) == CONST_FIXED))
6087 if (first_constant == NULL_RTX)
6090 if (i > 0 && !rtx_equal_p (x, first))
6094 /* if all elements are the same, use splats to repeat elements */
6097 if (!CONSTANT_P (first)
6098 && !register_operand (first, GET_MODE (x)))
6099 first = force_reg (GET_MODE (first), first);
6100 emit_insn (gen_spu_splats (target, first));
6104 /* load constant parts */
6105 if (n_var != n_elts)
6109 emit_move_insn (target,
6110 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6114 rtx constant_parts_rtx = copy_rtx (vals);
6116 gcc_assert (first_constant != NULL_RTX);
6117 /* fill empty slots with the first constant, this increases
6118 our chance of using splats in the recursive call below. */
6119 for (i = 0; i < n_elts; ++i)
6121 x = XVECEXP (constant_parts_rtx, 0, i);
6122 if (!(CONST_INT_P (x)
6123 || GET_CODE (x) == CONST_DOUBLE
6124 || GET_CODE (x) == CONST_FIXED))
6125 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6128 spu_expand_vector_init (target, constant_parts_rtx);
6132 /* load variable parts */
6135 rtx insert_operands[4];
6137 insert_operands[0] = target;
6138 insert_operands[2] = target;
6139 for (i = 0; i < n_elts; ++i)
6141 x = XVECEXP (vals, 0, i);
6142 if (!(CONST_INT_P (x)
6143 || GET_CODE (x) == CONST_DOUBLE
6144 || GET_CODE (x) == CONST_FIXED))
6146 if (!register_operand (x, GET_MODE (x)))
6147 x = force_reg (GET_MODE (x), x);
6148 insert_operands[1] = x;
6149 insert_operands[3] = GEN_INT (i);
6150 spu_builtin_insert (insert_operands);
6156 /* Return insn index for the vector compare instruction for given CODE,
6157 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6160 get_vec_cmp_insn (enum rtx_code code,
6161 enum machine_mode dest_mode,
6162 enum machine_mode op_mode)
6168 if (dest_mode == V16QImode && op_mode == V16QImode)
6169 return CODE_FOR_ceq_v16qi;
6170 if (dest_mode == V8HImode && op_mode == V8HImode)
6171 return CODE_FOR_ceq_v8hi;
6172 if (dest_mode == V4SImode && op_mode == V4SImode)
6173 return CODE_FOR_ceq_v4si;
6174 if (dest_mode == V4SImode && op_mode == V4SFmode)
6175 return CODE_FOR_ceq_v4sf;
6176 if (dest_mode == V2DImode && op_mode == V2DFmode)
6177 return CODE_FOR_ceq_v2df;
6180 if (dest_mode == V16QImode && op_mode == V16QImode)
6181 return CODE_FOR_cgt_v16qi;
6182 if (dest_mode == V8HImode && op_mode == V8HImode)
6183 return CODE_FOR_cgt_v8hi;
6184 if (dest_mode == V4SImode && op_mode == V4SImode)
6185 return CODE_FOR_cgt_v4si;
6186 if (dest_mode == V4SImode && op_mode == V4SFmode)
6187 return CODE_FOR_cgt_v4sf;
6188 if (dest_mode == V2DImode && op_mode == V2DFmode)
6189 return CODE_FOR_cgt_v2df;
6192 if (dest_mode == V16QImode && op_mode == V16QImode)
6193 return CODE_FOR_clgt_v16qi;
6194 if (dest_mode == V8HImode && op_mode == V8HImode)
6195 return CODE_FOR_clgt_v8hi;
6196 if (dest_mode == V4SImode && op_mode == V4SImode)
6197 return CODE_FOR_clgt_v4si;
6205 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6206 DMODE is expected destination mode. This is a recursive function. */
6209 spu_emit_vector_compare (enum rtx_code rcode,
6211 enum machine_mode dmode)
6215 enum machine_mode dest_mode;
6216 enum machine_mode op_mode = GET_MODE (op1);
6218 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6220 /* Floating point vector compare instructions uses destination V4SImode.
6221 Double floating point vector compare instructions uses destination V2DImode.
6222 Move destination to appropriate mode later. */
6223 if (dmode == V4SFmode)
6224 dest_mode = V4SImode;
6225 else if (dmode == V2DFmode)
6226 dest_mode = V2DImode;
6230 mask = gen_reg_rtx (dest_mode);
6231 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6233 if (vec_cmp_insn == -1)
6235 bool swap_operands = false;
6236 bool try_again = false;
6241 swap_operands = true;
6246 swap_operands = true;
6250 /* Treat A != B as ~(A==B). */
6252 enum insn_code nor_code;
6253 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6254 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
6255 gcc_assert (nor_code != CODE_FOR_nothing);
6256 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6257 if (dmode != dest_mode)
6259 rtx temp = gen_reg_rtx (dest_mode);
6260 convert_move (temp, mask, 0);
6270 /* Try GT/GTU/LT/LTU OR EQ */
6273 enum insn_code ior_code;
6274 enum rtx_code new_code;
6278 case GE: new_code = GT; break;
6279 case GEU: new_code = GTU; break;
6280 case LE: new_code = LT; break;
6281 case LEU: new_code = LTU; break;
6286 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6287 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6289 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
6290 gcc_assert (ior_code != CODE_FOR_nothing);
6291 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6292 if (dmode != dest_mode)
6294 rtx temp = gen_reg_rtx (dest_mode);
6295 convert_move (temp, mask, 0);
6305 /* You only get two chances. */
6307 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6309 gcc_assert (vec_cmp_insn != -1);
6320 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6321 if (dmode != dest_mode)
6323 rtx temp = gen_reg_rtx (dest_mode);
6324 convert_move (temp, mask, 0);
6331 /* Emit vector conditional expression.
6332 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6333 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6336 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6337 rtx cond, rtx cc_op0, rtx cc_op1)
6339 enum machine_mode dest_mode = GET_MODE (dest);
6340 enum rtx_code rcode = GET_CODE (cond);
6343 /* Get the vector mask for the given relational operations. */
6344 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6346 emit_insn(gen_selb (dest, op2, op1, mask));
6352 spu_force_reg (enum machine_mode mode, rtx op)
6355 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6357 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6358 || GET_MODE (op) == BLKmode)
6359 return force_reg (mode, convert_to_mode (mode, op, 0));
6363 r = force_reg (GET_MODE (op), op);
6364 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6366 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6371 x = gen_reg_rtx (mode);
6372 emit_insn (gen_spu_convert (x, r));
6377 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6379 HOST_WIDE_INT v = 0;
6381 /* Check the range of immediate operands. */
6382 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6384 int range = p - SPU_BTI_7;
6386 if (!CONSTANT_P (op))
6387 error ("%s expects an integer literal in the range [%d, %d].",
6389 spu_builtin_range[range].low, spu_builtin_range[range].high);
6391 if (GET_CODE (op) == CONST
6392 && (GET_CODE (XEXP (op, 0)) == PLUS
6393 || GET_CODE (XEXP (op, 0)) == MINUS))
6395 v = INTVAL (XEXP (XEXP (op, 0), 1));
6396 op = XEXP (XEXP (op, 0), 0);
6398 else if (GET_CODE (op) == CONST_INT)
6400 else if (GET_CODE (op) == CONST_VECTOR
6401 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6402 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6404 /* The default for v is 0 which is valid in every range. */
6405 if (v < spu_builtin_range[range].low
6406 || v > spu_builtin_range[range].high)
6407 error ("%s expects an integer literal in the range [%d, %d]. ("
6408 HOST_WIDE_INT_PRINT_DEC ")",
6410 spu_builtin_range[range].low, spu_builtin_range[range].high,
6419 /* This is only used in lqa, and stqa. Even though the insns
6420 encode 16 bits of the address (all but the 2 least
6421 significant), only 14 bits are used because it is masked to
6422 be 16 byte aligned. */
6426 /* This is used for lqr and stqr. */
6433 if (GET_CODE (op) == LABEL_REF
6434 || (GET_CODE (op) == SYMBOL_REF
6435 && SYMBOL_REF_FUNCTION_P (op))
6436 || (v & ((1 << lsbits) - 1)) != 0)
6437 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6444 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6445 rtx target, rtx ops[])
6447 enum insn_code icode = (enum insn_code) d->icode;
6450 /* Expand the arguments into rtl. */
6452 if (d->parm[0] != SPU_BTI_VOID)
6455 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6457 tree arg = CALL_EXPR_ARG (exp, a);
6460 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6463 /* The insn pattern may have additional operands (SCRATCH).
6464 Return the number of actual non-SCRATCH operands. */
6465 gcc_assert (i <= insn_data[icode].n_operands);
6470 spu_expand_builtin_1 (struct spu_builtin_description *d,
6471 tree exp, rtx target)
6475 enum insn_code icode = (enum insn_code) d->icode;
6476 enum machine_mode mode, tmode;
6481 /* Set up ops[] with values from arglist. */
6482 n_operands = expand_builtin_args (d, exp, target, ops);
6484 /* Handle the target operand which must be operand 0. */
6486 if (d->parm[0] != SPU_BTI_VOID)
6489 /* We prefer the mode specified for the match_operand otherwise
6490 use the mode from the builtin function prototype. */
6491 tmode = insn_data[d->icode].operand[0].mode;
6492 if (tmode == VOIDmode)
6493 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6495 /* Try to use target because not using it can lead to extra copies
6496 and when we are using all of the registers extra copies leads
6498 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6501 target = ops[0] = gen_reg_rtx (tmode);
6503 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6509 if (d->fcode == SPU_MASK_FOR_LOAD)
6511 enum machine_mode mode = insn_data[icode].operand[1].mode;
6516 arg = CALL_EXPR_ARG (exp, 0);
6517 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6518 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6519 addr = memory_address (mode, op);
6522 op = gen_reg_rtx (GET_MODE (addr));
6523 emit_insn (gen_rtx_SET (VOIDmode, op,
6524 gen_rtx_NEG (GET_MODE (addr), addr)));
6525 op = gen_rtx_MEM (mode, op);
6527 pat = GEN_FCN (icode) (target, op);
6534 /* Ignore align_hint, but still expand it's args in case they have
6536 if (icode == CODE_FOR_spu_align_hint)
6539 /* Handle the rest of the operands. */
6540 for (p = 1; i < n_operands; i++, p++)
6542 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6543 mode = insn_data[d->icode].operand[i].mode;
6545 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6547 /* mode can be VOIDmode here for labels */
6549 /* For specific intrinsics with an immediate operand, e.g.,
6550 si_ai(), we sometimes need to convert the scalar argument to a
6551 vector argument by splatting the scalar. */
6552 if (VECTOR_MODE_P (mode)
6553 && (GET_CODE (ops[i]) == CONST_INT
6554 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6555 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6557 if (GET_CODE (ops[i]) == CONST_INT)
6558 ops[i] = spu_const (mode, INTVAL (ops[i]));
6561 rtx reg = gen_reg_rtx (mode);
6562 enum machine_mode imode = GET_MODE_INNER (mode);
6563 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6564 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6565 if (imode != GET_MODE (ops[i]))
6566 ops[i] = convert_to_mode (imode, ops[i],
6567 TYPE_UNSIGNED (spu_builtin_types
6569 emit_insn (gen_spu_splats (reg, ops[i]));
6574 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6576 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6577 ops[i] = spu_force_reg (mode, ops[i]);
6583 pat = GEN_FCN (icode) (0);
6586 pat = GEN_FCN (icode) (ops[0]);
6589 pat = GEN_FCN (icode) (ops[0], ops[1]);
6592 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6595 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6598 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6601 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6610 if (d->type == B_CALL || d->type == B_BISLED)
6611 emit_call_insn (pat);
6612 else if (d->type == B_JUMP)
6614 emit_jump_insn (pat);
6620 return_type = spu_builtin_types[d->parm[0]];
6621 if (d->parm[0] != SPU_BTI_VOID
6622 && GET_MODE (target) != TYPE_MODE (return_type))
6624 /* target is the return value. It should always be the mode of
6625 the builtin function prototype. */
6626 target = spu_force_reg (TYPE_MODE (return_type), target);
6633 spu_expand_builtin (tree exp,
6635 rtx subtarget ATTRIBUTE_UNUSED,
6636 enum machine_mode mode ATTRIBUTE_UNUSED,
6637 int ignore ATTRIBUTE_UNUSED)
6639 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6640 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6641 struct spu_builtin_description *d;
6643 if (fcode < NUM_SPU_BUILTINS)
6645 d = &spu_builtins[fcode];
6647 return spu_expand_builtin_1 (d, exp, target);
6652 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6654 spu_builtin_mul_widen_even (tree type)
6656 switch (TYPE_MODE (type))
6659 if (TYPE_UNSIGNED (type))
6660 return spu_builtins[SPU_MULE_0].fndecl;
6662 return spu_builtins[SPU_MULE_1].fndecl;
6669 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6671 spu_builtin_mul_widen_odd (tree type)
6673 switch (TYPE_MODE (type))
6676 if (TYPE_UNSIGNED (type))
6677 return spu_builtins[SPU_MULO_1].fndecl;
6679 return spu_builtins[SPU_MULO_0].fndecl;
6686 /* Implement targetm.vectorize.builtin_mask_for_load. */
6688 spu_builtin_mask_for_load (void)
6690 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6695 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6697 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6698 tree vectype ATTRIBUTE_UNUSED,
6699 int misalign ATTRIBUTE_UNUSED)
6701 switch (type_of_cost)
6709 case cond_branch_not_taken:
6717 /* Load + rotate. */
6720 case unaligned_load:
6723 case cond_branch_taken:
6731 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6732 after applying N number of iterations. This routine does not determine
6733 how may iterations are required to reach desired alignment. */
6736 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6741 /* All other types are naturally aligned. */
6745 /* Implement targetm.vectorize.builtin_vec_perm. */
6747 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6749 struct spu_builtin_description *d;
6751 *mask_element_type = unsigned_char_type_node;
6753 switch (TYPE_MODE (type))
6756 if (TYPE_UNSIGNED (type))
6757 d = &spu_builtins[SPU_SHUFFLE_0];
6759 d = &spu_builtins[SPU_SHUFFLE_1];
6763 if (TYPE_UNSIGNED (type))
6764 d = &spu_builtins[SPU_SHUFFLE_2];
6766 d = &spu_builtins[SPU_SHUFFLE_3];
6770 if (TYPE_UNSIGNED (type))
6771 d = &spu_builtins[SPU_SHUFFLE_4];
6773 d = &spu_builtins[SPU_SHUFFLE_5];
6777 if (TYPE_UNSIGNED (type))
6778 d = &spu_builtins[SPU_SHUFFLE_6];
6780 d = &spu_builtins[SPU_SHUFFLE_7];
6784 d = &spu_builtins[SPU_SHUFFLE_8];
6788 d = &spu_builtins[SPU_SHUFFLE_9];
6799 /* Return the appropriate mode for a named address pointer. */
6800 static enum machine_mode
6801 spu_addr_space_pointer_mode (addr_space_t addrspace)
6805 case ADDR_SPACE_GENERIC:
6814 /* Return the appropriate mode for a named address address. */
6815 static enum machine_mode
6816 spu_addr_space_address_mode (addr_space_t addrspace)
6820 case ADDR_SPACE_GENERIC:
6829 /* Determine if one named address space is a subset of another. */
6832 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6834 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6835 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6837 if (subset == superset)
6840 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6841 being subsets but instead as disjoint address spaces. */
6842 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6846 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6849 /* Convert from one address space to another. */
6851 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6853 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6854 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6856 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6857 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6859 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6863 ls = gen_const_mem (DImode,
6864 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6865 set_mem_align (ls, 128);
6867 result = gen_reg_rtx (Pmode);
6868 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6869 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6870 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6871 ls, const0_rtx, Pmode, 1);
6873 emit_insn (gen_subsi3 (result, op, ls));
6878 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6882 ls = gen_const_mem (DImode,
6883 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6884 set_mem_align (ls, 128);
6886 result = gen_reg_rtx (EAmode);
6887 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6888 op = force_reg (Pmode, op);
6889 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6890 ls, const0_rtx, EAmode, 1);
6891 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6893 if (EAmode == SImode)
6894 emit_insn (gen_addsi3 (result, op, ls));
6896 emit_insn (gen_adddi3 (result, op, ls));
6906 /* Count the total number of instructions in each pipe and return the
6907 maximum, which is used as the Minimum Iteration Interval (MII)
6908 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6909 -2 are instructions that can go in pipe0 or pipe1. */
6911 spu_sms_res_mii (struct ddg *g)
6914 unsigned t[4] = {0, 0, 0, 0};
6916 for (i = 0; i < g->num_nodes; i++)
6918 rtx insn = g->nodes[i].insn;
6919 int p = get_pipe (insn) + 2;
6925 if (dump_file && INSN_P (insn))
6926 fprintf (dump_file, "i%d %s %d %d\n",
6928 insn_data[INSN_CODE(insn)].name,
6932 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6934 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6939 spu_init_expanders (void)
6944 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6945 frame_pointer_needed is true. We don't know that until we're
6946 expanding the prologue. */
6947 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6949 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6950 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6951 to be treated as aligned, so generate them here. */
6952 r0 = gen_reg_rtx (SImode);
6953 r1 = gen_reg_rtx (SImode);
6954 mark_reg_pointer (r0, 128);
6955 mark_reg_pointer (r1, 128);
6956 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6957 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6961 static enum machine_mode
6962 spu_libgcc_cmp_return_mode (void)
6965 /* For SPU word mode is TI mode so it is better to use SImode
6966 for compare returns. */
6970 static enum machine_mode
6971 spu_libgcc_shift_count_mode (void)
6973 /* For SPU word mode is TI mode so it is better to use SImode
6974 for shift counts. */
6978 /* An early place to adjust some flags after GCC has finished processing
6981 asm_file_start (void)
6983 /* Variable tracking should be run after all optimizations which
6984 change order of insns. It also needs a valid CFG. */
6985 spu_flag_var_tracking = flag_var_tracking;
6986 flag_var_tracking = 0;
6988 default_file_start ();
6991 /* Implement targetm.section_type_flags. */
6993 spu_section_type_flags (tree decl, const char *name, int reloc)
6995 /* .toe needs to have type @nobits. */
6996 if (strcmp (name, ".toe") == 0)
6998 /* Don't load _ea into the current address space. */
6999 if (strcmp (name, "._ea") == 0)
7000 return SECTION_WRITE | SECTION_DEBUG;
7001 return default_section_type_flags (decl, name, reloc);
7004 /* Implement targetm.select_section. */
7006 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7008 /* Variables and constants defined in the __ea address space
7009 go into a special section named "._ea". */
7010 if (TREE_TYPE (decl) != error_mark_node
7011 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7013 /* We might get called with string constants, but get_named_section
7014 doesn't like them as they are not DECLs. Also, we need to set
7015 flags in that case. */
7017 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7019 return get_named_section (decl, "._ea", reloc);
7022 return default_elf_select_section (decl, reloc, align);
7025 /* Implement targetm.unique_section. */
7027 spu_unique_section (tree decl, int reloc)
7029 /* We don't support unique section names in the __ea address
7031 if (TREE_TYPE (decl) != error_mark_node
7032 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7035 default_unique_section (decl, reloc);
7038 /* Generate a constant or register which contains 2^SCALE. We assume
7039 the result is valid for MODE. Currently, MODE must be V4SFmode and
7040 SCALE must be SImode. */
7042 spu_gen_exp2 (enum machine_mode mode, rtx scale)
7044 gcc_assert (mode == V4SFmode);
7045 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7046 if (GET_CODE (scale) != CONST_INT)
7048 /* unsigned int exp = (127 + scale) << 23;
7049 __vector float m = (__vector float) spu_splats (exp); */
7050 rtx reg = force_reg (SImode, scale);
7051 rtx exp = gen_reg_rtx (SImode);
7052 rtx mul = gen_reg_rtx (mode);
7053 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7054 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7055 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7060 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7061 unsigned char arr[16];
7062 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7063 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7064 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7065 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7066 return array_to_constant (mode, arr);
7070 /* After reload, just change the convert into a move instruction
7071 or a dead instruction. */
7073 spu_split_convert (rtx ops[])
7075 if (REGNO (ops[0]) == REGNO (ops[1]))
7076 emit_note (NOTE_INSN_DELETED);
7079 /* Use TImode always as this might help hard reg copyprop. */
7080 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7081 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7082 emit_insn (gen_move_insn (op0, op1));
7087 spu_function_profiler (FILE * file, int labelno)
7089 fprintf (file, "# profile\n");
7090 fprintf (file, "brsl $75, _mcount\n");