1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
36 #include "basic-block.h"
37 #include "integrate.h"
38 #include "diagnostic-core.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tm-constrs.h"
59 /* Builtin types, data and prototypes. */
61 enum spu_builtin_type_index
63 SPU_BTI_END_OF_PARAMS,
65 /* We create new type nodes for these. */
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
80 /* These all correspond to intSI_type_node */
94 /* These correspond to the standard types */
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
125 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
127 struct spu_builtin_range
132 static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
151 /* Prototypes and external defs. */
152 static void spu_option_override (void);
153 static void spu_option_optimization (int, int);
154 static void spu_option_default_params (void);
155 static void spu_init_builtins (void);
156 static tree spu_builtin_decl (unsigned, bool);
157 static bool spu_scalar_mode_supported_p (enum machine_mode mode);
158 static bool spu_vector_mode_supported_p (enum machine_mode mode);
159 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
160 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
162 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
163 static rtx get_pic_reg (void);
164 static int need_to_save_reg (int regno, int saving);
165 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
166 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
167 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
169 static void emit_nop_for_insn (rtx insn);
170 static bool insn_clobbers_hbr (rtx insn);
171 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
172 int distance, sbitmap blocks);
173 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
174 enum machine_mode dmode);
175 static rtx get_branch_target (rtx branch);
176 static void spu_machine_dependent_reorg (void);
177 static int spu_sched_issue_rate (void);
178 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
180 static int get_pipe (rtx insn);
181 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
182 static void spu_sched_init_global (FILE *, int, int);
183 static void spu_sched_init (FILE *, int, int);
184 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
185 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
188 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
191 static int spu_naked_function_p (tree func);
192 static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194 static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
196 static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
197 const_tree type, bool named);
198 static tree spu_build_builtin_va_list (void);
199 static void spu_va_start (tree, rtx);
200 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
201 gimple_seq * pre_p, gimple_seq * post_p);
202 static int store_with_one_insn_p (rtx mem);
203 static int mem_is_padded_component_ref (rtx x);
204 static int reg_aligned_for_addr (rtx x);
205 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
206 static void spu_asm_globalize_label (FILE * file, const char *name);
207 static bool spu_rtx_costs (rtx x, int code, int outer_code,
208 int *total, bool speed);
209 static bool spu_function_ok_for_sibcall (tree decl, tree exp);
210 static void spu_init_libfuncs (void);
211 static bool spu_return_in_memory (const_tree type, const_tree fntype);
212 static void fix_range (const char *);
213 static void spu_encode_section_info (tree, rtx, int);
214 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
215 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
217 static tree spu_builtin_mul_widen_even (tree);
218 static tree spu_builtin_mul_widen_odd (tree);
219 static tree spu_builtin_mask_for_load (void);
220 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
221 static bool spu_vector_alignment_reachable (const_tree, bool);
222 static tree spu_builtin_vec_perm (tree, tree *);
223 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
224 static enum machine_mode spu_addr_space_address_mode (addr_space_t);
225 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
226 static rtx spu_addr_space_convert (rtx, tree, tree);
227 static int spu_sms_res_mii (struct ddg *g);
228 static void asm_file_start (void);
229 static unsigned int spu_section_type_flags (tree, const char *, int);
230 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
231 static void spu_unique_section (tree, int);
232 static rtx spu_expand_load (rtx, rtx, rtx, int);
233 static void spu_trampoline_init (rtx, tree, rtx);
235 /* Which instruction set architecture to use. */
237 /* Which cpu are we tuning for. */
240 /* The hardware requires 8 insns between a hint and the branch it
241 effects. This variable describes how many rtl instructions the
242 compiler needs to see before inserting a hint, and then the compiler
243 will insert enough nops to make it at least 8 insns. The default is
244 for the compiler to allow up to 2 nops be emitted. The nops are
245 inserted in pairs, so we round down. */
246 int spu_hint_dist = (8*4) - (2*4);
248 /* Determines whether we run variable tracking in machine dependent
250 static int spu_flag_var_tracking;
265 IC_POOL, /* constant pool */
266 IC_IL1, /* one il* instruction */
267 IC_IL2, /* both ilhu and iohl instructions */
268 IC_IL1s, /* one il* instruction */
269 IC_IL2s, /* both ilhu and iohl instructions */
270 IC_FSMBI, /* the fsmbi instruction */
271 IC_CPAT, /* one of the c*d instructions */
272 IC_FSMBI2 /* fsmbi plus 1 other instruction */
275 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
276 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
277 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
278 static enum immediate_class classify_immediate (rtx op,
279 enum machine_mode mode);
281 static enum machine_mode spu_unwind_word_mode (void);
283 static enum machine_mode
284 spu_libgcc_cmp_return_mode (void);
286 static enum machine_mode
287 spu_libgcc_shift_count_mode (void);
289 /* Pointer mode for __ea references. */
290 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
293 /* Table of machine attributes. */
294 static const struct attribute_spec spu_attribute_table[] =
296 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
297 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
299 { NULL, 0, 0, false, false, false, NULL }
302 /* TARGET overrides. */
304 #undef TARGET_ADDR_SPACE_POINTER_MODE
305 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
307 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
308 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
310 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
311 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
312 spu_addr_space_legitimate_address_p
314 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
315 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
317 #undef TARGET_ADDR_SPACE_SUBSET_P
318 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
320 #undef TARGET_ADDR_SPACE_CONVERT
321 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
323 #undef TARGET_INIT_BUILTINS
324 #define TARGET_INIT_BUILTINS spu_init_builtins
325 #undef TARGET_BUILTIN_DECL
326 #define TARGET_BUILTIN_DECL spu_builtin_decl
328 #undef TARGET_EXPAND_BUILTIN
329 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
331 #undef TARGET_UNWIND_WORD_MODE
332 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
334 #undef TARGET_LEGITIMIZE_ADDRESS
335 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
337 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
338 and .quad for the debugger. When it is known that the assembler is fixed,
339 these can be removed. */
340 #undef TARGET_ASM_UNALIGNED_SI_OP
341 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
343 #undef TARGET_ASM_ALIGNED_DI_OP
344 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
346 /* The .8byte directive doesn't seem to work well for a 32 bit
348 #undef TARGET_ASM_UNALIGNED_DI_OP
349 #define TARGET_ASM_UNALIGNED_DI_OP NULL
351 #undef TARGET_RTX_COSTS
352 #define TARGET_RTX_COSTS spu_rtx_costs
354 #undef TARGET_ADDRESS_COST
355 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
357 #undef TARGET_SCHED_ISSUE_RATE
358 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
360 #undef TARGET_SCHED_INIT_GLOBAL
361 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
363 #undef TARGET_SCHED_INIT
364 #define TARGET_SCHED_INIT spu_sched_init
366 #undef TARGET_SCHED_VARIABLE_ISSUE
367 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
369 #undef TARGET_SCHED_REORDER
370 #define TARGET_SCHED_REORDER spu_sched_reorder
372 #undef TARGET_SCHED_REORDER2
373 #define TARGET_SCHED_REORDER2 spu_sched_reorder
375 #undef TARGET_SCHED_ADJUST_COST
376 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
378 #undef TARGET_ATTRIBUTE_TABLE
379 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
381 #undef TARGET_ASM_INTEGER
382 #define TARGET_ASM_INTEGER spu_assemble_integer
384 #undef TARGET_SCALAR_MODE_SUPPORTED_P
385 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
387 #undef TARGET_VECTOR_MODE_SUPPORTED_P
388 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
390 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
391 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
393 #undef TARGET_ASM_GLOBALIZE_LABEL
394 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
396 #undef TARGET_PASS_BY_REFERENCE
397 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
399 #undef TARGET_FUNCTION_ARG
400 #define TARGET_FUNCTION_ARG spu_function_arg
402 #undef TARGET_FUNCTION_ARG_ADVANCE
403 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
405 #undef TARGET_MUST_PASS_IN_STACK
406 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
408 #undef TARGET_BUILD_BUILTIN_VA_LIST
409 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
411 #undef TARGET_EXPAND_BUILTIN_VA_START
412 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
414 #undef TARGET_SETUP_INCOMING_VARARGS
415 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
417 #undef TARGET_MACHINE_DEPENDENT_REORG
418 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
420 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
421 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
423 #undef TARGET_DEFAULT_TARGET_FLAGS
424 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
426 #undef TARGET_INIT_LIBFUNCS
427 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
429 #undef TARGET_RETURN_IN_MEMORY
430 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
432 #undef TARGET_ENCODE_SECTION_INFO
433 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
435 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
436 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
438 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
439 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
441 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
442 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
444 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
445 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
447 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
448 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
450 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
451 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
453 #undef TARGET_LIBGCC_CMP_RETURN_MODE
454 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
456 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
457 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
459 #undef TARGET_SCHED_SMS_RES_MII
460 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
462 #undef TARGET_ASM_FILE_START
463 #define TARGET_ASM_FILE_START asm_file_start
465 #undef TARGET_SECTION_TYPE_FLAGS
466 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
468 #undef TARGET_ASM_SELECT_SECTION
469 #define TARGET_ASM_SELECT_SECTION spu_select_section
471 #undef TARGET_ASM_UNIQUE_SECTION
472 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
474 #undef TARGET_LEGITIMATE_ADDRESS_P
475 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
477 #undef TARGET_TRAMPOLINE_INIT
478 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
480 #undef TARGET_OPTION_OVERRIDE
481 #define TARGET_OPTION_OVERRIDE spu_option_override
483 #undef TARGET_OPTION_OPTIMIZATION
484 #define TARGET_OPTION_OPTIMIZATION spu_option_optimization
486 #undef TARGET_OPTION_DEFAULT_PARAMS
487 #define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
489 #undef TARGET_EXCEPT_UNWIND_INFO
490 #define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
492 struct gcc_target targetm = TARGET_INITIALIZER;
495 spu_option_optimization (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
497 /* With so many registers this is better on by default. */
498 flag_rename_registers = 1;
501 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
503 spu_option_default_params (void)
505 /* Override some of the default param values. With so many registers
506 larger values are better for these params. */
507 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
510 /* Implement TARGET_OPTION_OVERRIDE. */
512 spu_option_override (void)
514 /* Small loops will be unpeeled at -O3. For SPU it is more important
515 to keep code small by default. */
516 if (!flag_unroll_loops && !flag_peel_loops)
517 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 1);
519 flag_omit_frame_pointer = 1;
521 /* Functions must be 8 byte aligned so we correctly handle dual issue */
522 if (align_functions < 8)
525 spu_hint_dist = 8*4 - spu_max_nops*4;
526 if (spu_hint_dist < 0)
529 if (spu_fixed_range_string)
530 fix_range (spu_fixed_range_string);
532 /* Determine processor architectural level. */
535 if (strcmp (&spu_arch_string[0], "cell") == 0)
536 spu_arch = PROCESSOR_CELL;
537 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
538 spu_arch = PROCESSOR_CELLEDP;
540 error ("Unknown architecture '%s'", &spu_arch_string[0]);
543 /* Determine processor to tune for. */
546 if (strcmp (&spu_tune_string[0], "cell") == 0)
547 spu_tune = PROCESSOR_CELL;
548 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
549 spu_tune = PROCESSOR_CELLEDP;
551 error ("Unknown architecture '%s'", &spu_tune_string[0]);
554 /* Change defaults according to the processor architecture. */
555 if (spu_arch == PROCESSOR_CELLEDP)
557 /* If no command line option has been otherwise specified, change
558 the default to -mno-safe-hints on celledp -- only the original
559 Cell/B.E. processors require this workaround. */
560 if (!(target_flags_explicit & MASK_SAFE_HINTS))
561 target_flags &= ~MASK_SAFE_HINTS;
564 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
567 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
568 struct attribute_spec.handler. */
570 /* True if MODE is valid for the target. By "valid", we mean able to
571 be manipulated in non-trivial ways. In particular, this means all
572 the arithmetic is supported. */
574 spu_scalar_mode_supported_p (enum machine_mode mode)
592 /* Similarly for vector modes. "Supported" here is less strict. At
593 least some operations are supported; need to check optabs or builtins
594 for further details. */
596 spu_vector_mode_supported_p (enum machine_mode mode)
613 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
614 least significant bytes of the outer mode. This function returns
615 TRUE for the SUBREG's where this is correct. */
617 valid_subreg (rtx op)
619 enum machine_mode om = GET_MODE (op);
620 enum machine_mode im = GET_MODE (SUBREG_REG (op));
621 return om != VOIDmode && im != VOIDmode
622 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
623 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
624 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
627 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
628 and adjust the start offset. */
630 adjust_operand (rtx op, HOST_WIDE_INT * start)
632 enum machine_mode mode;
634 /* Strip any paradoxical SUBREG. */
635 if (GET_CODE (op) == SUBREG
636 && (GET_MODE_BITSIZE (GET_MODE (op))
637 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
641 GET_MODE_BITSIZE (GET_MODE (op)) -
642 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
643 op = SUBREG_REG (op);
645 /* If it is smaller than SI, assure a SUBREG */
646 op_size = GET_MODE_BITSIZE (GET_MODE (op));
650 *start += 32 - op_size;
653 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
654 mode = mode_for_size (op_size, MODE_INT, 0);
655 if (mode != GET_MODE (op))
656 op = gen_rtx_SUBREG (mode, op, 0);
661 spu_expand_extv (rtx ops[], int unsignedp)
663 rtx dst = ops[0], src = ops[1];
664 HOST_WIDE_INT width = INTVAL (ops[2]);
665 HOST_WIDE_INT start = INTVAL (ops[3]);
666 HOST_WIDE_INT align_mask;
667 rtx s0, s1, mask, r0;
669 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
673 /* First, determine if we need 1 TImode load or 2. We need only 1
674 if the bits being extracted do not cross the alignment boundary
675 as determined by the MEM and its address. */
677 align_mask = -MEM_ALIGN (src);
678 if ((start & align_mask) == ((start + width - 1) & align_mask))
680 /* Alignment is sufficient for 1 load. */
681 s0 = gen_reg_rtx (TImode);
682 r0 = spu_expand_load (s0, 0, src, start / 8);
685 emit_insn (gen_rotqby_ti (s0, s0, r0));
690 s0 = gen_reg_rtx (TImode);
691 s1 = gen_reg_rtx (TImode);
692 r0 = spu_expand_load (s0, s1, src, start / 8);
695 gcc_assert (start + width <= 128);
698 rtx r1 = gen_reg_rtx (SImode);
699 mask = gen_reg_rtx (TImode);
700 emit_move_insn (mask, GEN_INT (-1));
701 emit_insn (gen_rotqby_ti (s0, s0, r0));
702 emit_insn (gen_rotqby_ti (s1, s1, r0));
703 if (GET_CODE (r0) == CONST_INT)
704 r1 = GEN_INT (INTVAL (r0) & 15);
706 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
707 emit_insn (gen_shlqby_ti (mask, mask, r1));
708 emit_insn (gen_selb (s0, s1, s0, mask));
713 else if (GET_CODE (src) == SUBREG)
715 rtx r = SUBREG_REG (src);
716 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
717 s0 = gen_reg_rtx (TImode);
718 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
719 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
721 emit_move_insn (s0, src);
725 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
726 s0 = gen_reg_rtx (TImode);
727 emit_move_insn (s0, src);
730 /* Now s0 is TImode and contains the bits to extract at start. */
733 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
737 tree c = build_int_cst (NULL_TREE, 128 - width);
738 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
741 emit_move_insn (dst, s0);
745 spu_expand_insv (rtx ops[])
747 HOST_WIDE_INT width = INTVAL (ops[1]);
748 HOST_WIDE_INT start = INTVAL (ops[2]);
749 HOST_WIDE_INT maskbits;
750 enum machine_mode dst_mode, src_mode;
751 rtx dst = ops[0], src = ops[3];
752 int dst_size, src_size;
758 if (GET_CODE (ops[0]) == MEM)
759 dst = gen_reg_rtx (TImode);
761 dst = adjust_operand (dst, &start);
762 dst_mode = GET_MODE (dst);
763 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
765 if (CONSTANT_P (src))
767 enum machine_mode m =
768 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
769 src = force_reg (m, convert_to_mode (m, src, 0));
771 src = adjust_operand (src, 0);
772 src_mode = GET_MODE (src);
773 src_size = GET_MODE_BITSIZE (GET_MODE (src));
775 mask = gen_reg_rtx (dst_mode);
776 shift_reg = gen_reg_rtx (dst_mode);
777 shift = dst_size - start - width;
779 /* It's not safe to use subreg here because the compiler assumes
780 that the SUBREG_REG is right justified in the SUBREG. */
781 convert_move (shift_reg, src, 1);
788 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
791 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
794 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
806 maskbits = (-1ll << (32 - width - start));
808 maskbits += (1ll << (32 - start));
809 emit_move_insn (mask, GEN_INT (maskbits));
812 maskbits = (-1ll << (64 - width - start));
814 maskbits += (1ll << (64 - start));
815 emit_move_insn (mask, GEN_INT (maskbits));
819 unsigned char arr[16];
821 memset (arr, 0, sizeof (arr));
822 arr[i] = 0xff >> (start & 7);
823 for (i++; i <= (start + width - 1) / 8; i++)
825 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
826 emit_move_insn (mask, array_to_constant (TImode, arr));
832 if (GET_CODE (ops[0]) == MEM)
834 rtx low = gen_reg_rtx (SImode);
835 rtx rotl = gen_reg_rtx (SImode);
836 rtx mask0 = gen_reg_rtx (TImode);
842 addr = force_reg (Pmode, XEXP (ops[0], 0));
843 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
844 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
845 emit_insn (gen_negsi2 (rotl, low));
846 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
847 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
848 mem = change_address (ops[0], TImode, addr0);
849 set_mem_alias_set (mem, 0);
850 emit_move_insn (dst, mem);
851 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
852 if (start + width > MEM_ALIGN (ops[0]))
854 rtx shl = gen_reg_rtx (SImode);
855 rtx mask1 = gen_reg_rtx (TImode);
856 rtx dst1 = gen_reg_rtx (TImode);
858 addr1 = plus_constant (addr, 16);
859 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
860 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
861 emit_insn (gen_shlqby_ti (mask1, mask, shl));
862 mem1 = change_address (ops[0], TImode, addr1);
863 set_mem_alias_set (mem1, 0);
864 emit_move_insn (dst1, mem1);
865 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
866 emit_move_insn (mem1, dst1);
868 emit_move_insn (mem, dst);
871 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
876 spu_expand_block_move (rtx ops[])
878 HOST_WIDE_INT bytes, align, offset;
879 rtx src, dst, sreg, dreg, target;
881 if (GET_CODE (ops[2]) != CONST_INT
882 || GET_CODE (ops[3]) != CONST_INT
883 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
886 bytes = INTVAL (ops[2]);
887 align = INTVAL (ops[3]);
897 for (offset = 0; offset + 16 <= bytes; offset += 16)
899 dst = adjust_address (ops[0], V16QImode, offset);
900 src = adjust_address (ops[1], V16QImode, offset);
901 emit_move_insn (dst, src);
906 unsigned char arr[16] = { 0 };
907 for (i = 0; i < bytes - offset; i++)
909 dst = adjust_address (ops[0], V16QImode, offset);
910 src = adjust_address (ops[1], V16QImode, offset);
911 mask = gen_reg_rtx (V16QImode);
912 sreg = gen_reg_rtx (V16QImode);
913 dreg = gen_reg_rtx (V16QImode);
914 target = gen_reg_rtx (V16QImode);
915 emit_move_insn (mask, array_to_constant (V16QImode, arr));
916 emit_move_insn (dreg, dst);
917 emit_move_insn (sreg, src);
918 emit_insn (gen_selb (target, dreg, sreg, mask));
919 emit_move_insn (dst, target);
927 { SPU_EQ, SPU_GT, SPU_GTU };
929 int spu_comp_icode[12][3] = {
930 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
931 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
932 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
933 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
934 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
935 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
936 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
937 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
938 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
939 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
940 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
941 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
944 /* Generate a compare for CODE. Return a brand-new rtx that represents
945 the result of the compare. GCC can figure this out too if we don't
946 provide all variations of compares, but GCC always wants to use
947 WORD_MODE, we can generate better code in most cases if we do it
950 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
952 int reverse_compare = 0;
953 int reverse_test = 0;
954 rtx compare_result, eq_result;
955 rtx comp_rtx, eq_rtx;
956 enum machine_mode comp_mode;
957 enum machine_mode op_mode;
958 enum spu_comp_code scode, eq_code;
959 enum insn_code ior_code;
960 enum rtx_code code = GET_CODE (cmp);
961 rtx op0 = XEXP (cmp, 0);
962 rtx op1 = XEXP (cmp, 1);
966 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
967 and so on, to keep the constant in operand 1. */
968 if (GET_CODE (op1) == CONST_INT)
970 HOST_WIDE_INT val = INTVAL (op1) - 1;
971 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
996 op_mode = GET_MODE (op0);
1002 if (HONOR_NANS (op_mode))
1004 reverse_compare = 0;
1011 reverse_compare = 1;
1017 if (HONOR_NANS (op_mode))
1019 reverse_compare = 1;
1026 reverse_compare = 0;
1031 reverse_compare = 1;
1036 reverse_compare = 1;
1041 reverse_compare = 0;
1046 reverse_compare = 1;
1051 reverse_compare = 0;
1097 comp_mode = op_mode;
1101 comp_mode = op_mode;
1105 comp_mode = op_mode;
1109 comp_mode = V4SImode;
1113 comp_mode = V2DImode;
1120 if (GET_MODE (op1) == DFmode
1121 && (scode != SPU_GT && scode != SPU_EQ))
1124 if (is_set == 0 && op1 == const0_rtx
1125 && (GET_MODE (op0) == SImode
1126 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1128 /* Don't need to set a register with the result when we are
1129 comparing against zero and branching. */
1130 reverse_test = !reverse_test;
1131 compare_result = op0;
1135 compare_result = gen_reg_rtx (comp_mode);
1137 if (reverse_compare)
1144 if (spu_comp_icode[index][scode] == 0)
1147 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1149 op0 = force_reg (op_mode, op0);
1150 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1152 op1 = force_reg (op_mode, op1);
1153 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1157 emit_insn (comp_rtx);
1161 eq_result = gen_reg_rtx (comp_mode);
1162 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1167 ior_code = optab_handler (ior_optab, comp_mode);
1168 gcc_assert (ior_code != CODE_FOR_nothing);
1169 emit_insn (GEN_FCN (ior_code)
1170 (compare_result, compare_result, eq_result));
1179 /* We don't have branch on QI compare insns, so we convert the
1180 QI compare result to a HI result. */
1181 if (comp_mode == QImode)
1183 rtx old_res = compare_result;
1184 compare_result = gen_reg_rtx (HImode);
1186 emit_insn (gen_extendqihi2 (compare_result, old_res));
1190 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1192 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1194 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1195 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1196 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1199 else if (is_set == 2)
1201 rtx target = operands[0];
1202 int compare_size = GET_MODE_BITSIZE (comp_mode);
1203 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1204 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1206 rtx op_t = operands[2];
1207 rtx op_f = operands[3];
1209 /* The result of the comparison can be SI, HI or QI mode. Create a
1210 mask based on that result. */
1211 if (target_size > compare_size)
1213 select_mask = gen_reg_rtx (mode);
1214 emit_insn (gen_extend_compare (select_mask, compare_result));
1216 else if (target_size < compare_size)
1218 gen_rtx_SUBREG (mode, compare_result,
1219 (compare_size - target_size) / BITS_PER_UNIT);
1220 else if (comp_mode != mode)
1221 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1223 select_mask = compare_result;
1225 if (GET_MODE (target) != GET_MODE (op_t)
1226 || GET_MODE (target) != GET_MODE (op_f))
1230 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1232 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1236 rtx target = operands[0];
1238 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1239 gen_rtx_NOT (comp_mode, compare_result)));
1240 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1241 emit_insn (gen_extendhisi2 (target, compare_result));
1242 else if (GET_MODE (target) == SImode
1243 && GET_MODE (compare_result) == QImode)
1244 emit_insn (gen_extend_compare (target, compare_result));
1246 emit_move_insn (target, compare_result);
1251 const_double_to_hwint (rtx x)
1255 if (GET_MODE (x) == SFmode)
1257 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1258 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1260 else if (GET_MODE (x) == DFmode)
1263 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1264 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1266 val = (val << 32) | (l[1] & 0xffffffff);
1274 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1278 gcc_assert (mode == SFmode || mode == DFmode);
1281 tv[0] = (v << 32) >> 32;
1282 else if (mode == DFmode)
1284 tv[1] = (v << 32) >> 32;
1287 real_from_target (&rv, tv, mode);
1288 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1292 print_operand_address (FILE * file, register rtx addr)
1297 if (GET_CODE (addr) == AND
1298 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1299 && INTVAL (XEXP (addr, 1)) == -16)
1300 addr = XEXP (addr, 0);
1302 switch (GET_CODE (addr))
1305 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1309 reg = XEXP (addr, 0);
1310 offset = XEXP (addr, 1);
1311 if (GET_CODE (offset) == REG)
1313 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1314 reg_names[REGNO (offset)]);
1316 else if (GET_CODE (offset) == CONST_INT)
1318 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1319 INTVAL (offset), reg_names[REGNO (reg)]);
1329 output_addr_const (file, addr);
1339 print_operand (FILE * file, rtx x, int code)
1341 enum machine_mode mode = GET_MODE (x);
1343 unsigned char arr[16];
1344 int xcode = GET_CODE (x);
1346 if (GET_MODE (x) == VOIDmode)
1349 case 'L': /* 128 bits, signed */
1350 case 'm': /* 128 bits, signed */
1351 case 'T': /* 128 bits, signed */
1352 case 't': /* 128 bits, signed */
1355 case 'K': /* 64 bits, signed */
1356 case 'k': /* 64 bits, signed */
1357 case 'D': /* 64 bits, signed */
1358 case 'd': /* 64 bits, signed */
1361 case 'J': /* 32 bits, signed */
1362 case 'j': /* 32 bits, signed */
1363 case 's': /* 32 bits, signed */
1364 case 'S': /* 32 bits, signed */
1371 case 'j': /* 32 bits, signed */
1372 case 'k': /* 64 bits, signed */
1373 case 'm': /* 128 bits, signed */
1374 if (xcode == CONST_INT
1375 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1377 gcc_assert (logical_immediate_p (x, mode));
1378 constant_to_array (mode, x, arr);
1379 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1380 val = trunc_int_for_mode (val, SImode);
1381 switch (which_logical_immediate (val))
1386 fprintf (file, "h");
1389 fprintf (file, "b");
1399 case 'J': /* 32 bits, signed */
1400 case 'K': /* 64 bits, signed */
1401 case 'L': /* 128 bits, signed */
1402 if (xcode == CONST_INT
1403 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1405 gcc_assert (logical_immediate_p (x, mode)
1406 || iohl_immediate_p (x, mode));
1407 constant_to_array (mode, x, arr);
1408 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1409 val = trunc_int_for_mode (val, SImode);
1410 switch (which_logical_immediate (val))
1416 val = trunc_int_for_mode (val, HImode);
1419 val = trunc_int_for_mode (val, QImode);
1424 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1430 case 't': /* 128 bits, signed */
1431 case 'd': /* 64 bits, signed */
1432 case 's': /* 32 bits, signed */
1435 enum immediate_class c = classify_immediate (x, mode);
1439 constant_to_array (mode, x, arr);
1440 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1441 val = trunc_int_for_mode (val, SImode);
1442 switch (which_immediate_load (val))
1447 fprintf (file, "a");
1450 fprintf (file, "h");
1453 fprintf (file, "hu");
1460 constant_to_array (mode, x, arr);
1461 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1463 fprintf (file, "b");
1465 fprintf (file, "h");
1467 fprintf (file, "w");
1469 fprintf (file, "d");
1472 if (xcode == CONST_VECTOR)
1474 x = CONST_VECTOR_ELT (x, 0);
1475 xcode = GET_CODE (x);
1477 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1478 fprintf (file, "a");
1479 else if (xcode == HIGH)
1480 fprintf (file, "hu");
1494 case 'T': /* 128 bits, signed */
1495 case 'D': /* 64 bits, signed */
1496 case 'S': /* 32 bits, signed */
1499 enum immediate_class c = classify_immediate (x, mode);
1503 constant_to_array (mode, x, arr);
1504 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1505 val = trunc_int_for_mode (val, SImode);
1506 switch (which_immediate_load (val))
1513 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1518 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1521 constant_to_array (mode, x, arr);
1523 for (i = 0; i < 16; i++)
1528 print_operand (file, GEN_INT (val), 0);
1531 constant_to_array (mode, x, arr);
1532 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1533 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1538 if (GET_CODE (x) == CONST_VECTOR)
1539 x = CONST_VECTOR_ELT (x, 0);
1540 output_addr_const (file, x);
1542 fprintf (file, "@h");
1556 if (xcode == CONST_INT)
1558 /* Only 4 least significant bits are relevant for generate
1559 control word instructions. */
1560 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1565 case 'M': /* print code for c*d */
1566 if (GET_CODE (x) == CONST_INT)
1570 fprintf (file, "b");
1573 fprintf (file, "h");
1576 fprintf (file, "w");
1579 fprintf (file, "d");
1588 case 'N': /* Negate the operand */
1589 if (xcode == CONST_INT)
1590 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1591 else if (xcode == CONST_VECTOR)
1592 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1593 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1596 case 'I': /* enable/disable interrupts */
1597 if (xcode == CONST_INT)
1598 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1601 case 'b': /* branch modifiers */
1603 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1604 else if (COMPARISON_P (x))
1605 fprintf (file, "%s", xcode == NE ? "n" : "");
1608 case 'i': /* indirect call */
1611 if (GET_CODE (XEXP (x, 0)) == REG)
1612 /* Used in indirect function calls. */
1613 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1615 output_address (XEXP (x, 0));
1619 case 'p': /* load/store */
1623 xcode = GET_CODE (x);
1628 xcode = GET_CODE (x);
1631 fprintf (file, "d");
1632 else if (xcode == CONST_INT)
1633 fprintf (file, "a");
1634 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1635 fprintf (file, "r");
1636 else if (xcode == PLUS || xcode == LO_SUM)
1638 if (GET_CODE (XEXP (x, 1)) == REG)
1639 fprintf (file, "x");
1641 fprintf (file, "d");
1646 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1648 output_addr_const (file, GEN_INT (val));
1652 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1654 output_addr_const (file, GEN_INT (val));
1658 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1660 output_addr_const (file, GEN_INT (val));
1664 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1665 val = (val >> 3) & 0x1f;
1666 output_addr_const (file, GEN_INT (val));
1670 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1673 output_addr_const (file, GEN_INT (val));
1677 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1680 output_addr_const (file, GEN_INT (val));
1684 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1687 output_addr_const (file, GEN_INT (val));
1691 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1692 val = -(val & -8ll);
1693 val = (val >> 3) & 0x1f;
1694 output_addr_const (file, GEN_INT (val));
1699 constant_to_array (mode, x, arr);
1700 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1701 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1706 fprintf (file, "%s", reg_names[REGNO (x)]);
1707 else if (xcode == MEM)
1708 output_address (XEXP (x, 0));
1709 else if (xcode == CONST_VECTOR)
1710 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1712 output_addr_const (file, x);
1719 output_operand_lossage ("invalid %%xn code");
1724 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1725 caller saved register. For leaf functions it is more efficient to
1726 use a volatile register because we won't need to save and restore the
1727 pic register. This routine is only valid after register allocation
1728 is completed, so we can pick an unused register. */
1732 rtx pic_reg = pic_offset_table_rtx;
1733 if (!reload_completed && !reload_in_progress)
1735 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1736 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1740 /* Split constant addresses to handle cases that are too large.
1741 Add in the pic register when in PIC mode.
1742 Split immediates that require more than 1 instruction. */
1744 spu_split_immediate (rtx * ops)
1746 enum machine_mode mode = GET_MODE (ops[0]);
1747 enum immediate_class c = classify_immediate (ops[1], mode);
1753 unsigned char arrhi[16];
1754 unsigned char arrlo[16];
1755 rtx to, temp, hi, lo;
1757 enum machine_mode imode = mode;
1758 /* We need to do reals as ints because the constant used in the
1759 IOR might not be a legitimate real constant. */
1760 imode = int_mode_for_mode (mode);
1761 constant_to_array (mode, ops[1], arrhi);
1763 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1766 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1767 for (i = 0; i < 16; i += 4)
1769 arrlo[i + 2] = arrhi[i + 2];
1770 arrlo[i + 3] = arrhi[i + 3];
1771 arrlo[i + 0] = arrlo[i + 1] = 0;
1772 arrhi[i + 2] = arrhi[i + 3] = 0;
1774 hi = array_to_constant (imode, arrhi);
1775 lo = array_to_constant (imode, arrlo);
1776 emit_move_insn (temp, hi);
1777 emit_insn (gen_rtx_SET
1778 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1783 unsigned char arr_fsmbi[16];
1784 unsigned char arr_andbi[16];
1785 rtx to, reg_fsmbi, reg_and;
1787 enum machine_mode imode = mode;
1788 /* We need to do reals as ints because the constant used in the
1789 * AND might not be a legitimate real constant. */
1790 imode = int_mode_for_mode (mode);
1791 constant_to_array (mode, ops[1], arr_fsmbi);
1793 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1796 for (i = 0; i < 16; i++)
1797 if (arr_fsmbi[i] != 0)
1799 arr_andbi[0] = arr_fsmbi[i];
1800 arr_fsmbi[i] = 0xff;
1802 for (i = 1; i < 16; i++)
1803 arr_andbi[i] = arr_andbi[0];
1804 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1805 reg_and = array_to_constant (imode, arr_andbi);
1806 emit_move_insn (to, reg_fsmbi);
1807 emit_insn (gen_rtx_SET
1808 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1812 if (reload_in_progress || reload_completed)
1814 rtx mem = force_const_mem (mode, ops[1]);
1815 if (TARGET_LARGE_MEM)
1817 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1818 emit_move_insn (addr, XEXP (mem, 0));
1819 mem = replace_equiv_address (mem, addr);
1821 emit_move_insn (ops[0], mem);
1827 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1831 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1832 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1835 emit_insn (gen_pic (ops[0], ops[1]));
1838 rtx pic_reg = get_pic_reg ();
1839 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1840 crtl->uses_pic_offset_table = 1;
1842 return flag_pic || c == IC_IL2s;
1853 /* SAVING is TRUE when we are generating the actual load and store
1854 instructions for REGNO. When determining the size of the stack
1855 needed for saving register we must allocate enough space for the
1856 worst case, because we don't always have the information early enough
1857 to not allocate it. But we can at least eliminate the actual loads
1858 and stores during the prologue/epilogue. */
1860 need_to_save_reg (int regno, int saving)
1862 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1865 && regno == PIC_OFFSET_TABLE_REGNUM
1866 && (!saving || crtl->uses_pic_offset_table)
1868 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1873 /* This function is only correct starting with local register
1876 spu_saved_regs_size (void)
1878 int reg_save_size = 0;
1881 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1882 if (need_to_save_reg (regno, 0))
1883 reg_save_size += 0x10;
1884 return reg_save_size;
1888 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1890 rtx reg = gen_rtx_REG (V4SImode, regno);
1892 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1893 return emit_insn (gen_movv4si (mem, reg));
1897 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1899 rtx reg = gen_rtx_REG (V4SImode, regno);
1901 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1902 return emit_insn (gen_movv4si (reg, mem));
1905 /* This happens after reload, so we need to expand it. */
1907 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1910 if (satisfies_constraint_K (GEN_INT (imm)))
1912 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1916 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1917 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1918 if (REGNO (src) == REGNO (scratch))
1924 /* Return nonzero if this function is known to have a null epilogue. */
1927 direct_return (void)
1929 if (reload_completed)
1931 if (cfun->static_chain_decl == 0
1932 && (spu_saved_regs_size ()
1934 + crtl->outgoing_args_size
1935 + crtl->args.pretend_args_size == 0)
1936 && current_function_is_leaf)
1943 The stack frame looks like this:
1947 AP -> +-------------+
1950 prev SP | back chain |
1953 | reg save | crtl->args.pretend_args_size bytes
1956 | saved regs | spu_saved_regs_size() bytes
1957 FP -> +-------------+
1959 | vars | get_frame_size() bytes
1960 HFP -> +-------------+
1963 | args | crtl->outgoing_args_size bytes
1969 SP -> +-------------+
1973 spu_expand_prologue (void)
1975 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1976 HOST_WIDE_INT total_size;
1977 HOST_WIDE_INT saved_regs_size;
1978 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1979 rtx scratch_reg_0, scratch_reg_1;
1982 if (flag_pic && optimize == 0)
1983 crtl->uses_pic_offset_table = 1;
1985 if (spu_naked_function_p (current_function_decl))
1988 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1989 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1991 saved_regs_size = spu_saved_regs_size ();
1992 total_size = size + saved_regs_size
1993 + crtl->outgoing_args_size
1994 + crtl->args.pretend_args_size;
1996 if (!current_function_is_leaf
1997 || cfun->calls_alloca || total_size > 0)
1998 total_size += STACK_POINTER_OFFSET;
2000 /* Save this first because code after this might use the link
2001 register as a scratch register. */
2002 if (!current_function_is_leaf)
2004 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2005 RTX_FRAME_RELATED_P (insn) = 1;
2010 offset = -crtl->args.pretend_args_size;
2011 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2012 if (need_to_save_reg (regno, 1))
2015 insn = frame_emit_store (regno, sp_reg, offset);
2016 RTX_FRAME_RELATED_P (insn) = 1;
2020 if (flag_pic && crtl->uses_pic_offset_table)
2022 rtx pic_reg = get_pic_reg ();
2023 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2024 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2029 if (flag_stack_check)
2031 /* We compare against total_size-1 because
2032 ($sp >= total_size) <=> ($sp > total_size-1) */
2033 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2034 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2035 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2036 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2038 emit_move_insn (scratch_v4si, size_v4si);
2039 size_v4si = scratch_v4si;
2041 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2042 emit_insn (gen_vec_extractv4si
2043 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2044 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2047 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2048 the value of the previous $sp because we save it as the back
2050 if (total_size <= 2000)
2052 /* In this case we save the back chain first. */
2053 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2055 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2059 insn = emit_move_insn (scratch_reg_0, sp_reg);
2061 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2063 RTX_FRAME_RELATED_P (insn) = 1;
2064 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2065 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2067 if (total_size > 2000)
2069 /* Save the back chain ptr */
2070 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2073 if (frame_pointer_needed)
2075 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2076 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2077 + crtl->outgoing_args_size;
2078 /* Set the new frame_pointer */
2079 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2080 RTX_FRAME_RELATED_P (insn) = 1;
2081 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2082 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2083 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2090 spu_expand_epilogue (bool sibcall_p)
2092 int size = get_frame_size (), offset, regno;
2093 HOST_WIDE_INT saved_regs_size, total_size;
2094 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2095 rtx jump, scratch_reg_0;
2097 if (spu_naked_function_p (current_function_decl))
2100 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2102 saved_regs_size = spu_saved_regs_size ();
2103 total_size = size + saved_regs_size
2104 + crtl->outgoing_args_size
2105 + crtl->args.pretend_args_size;
2107 if (!current_function_is_leaf
2108 || cfun->calls_alloca || total_size > 0)
2109 total_size += STACK_POINTER_OFFSET;
2113 if (cfun->calls_alloca)
2114 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2116 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2119 if (saved_regs_size > 0)
2121 offset = -crtl->args.pretend_args_size;
2122 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2123 if (need_to_save_reg (regno, 1))
2126 frame_emit_load (regno, sp_reg, offset);
2131 if (!current_function_is_leaf)
2132 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2136 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2137 jump = emit_jump_insn (gen__return ());
2138 emit_barrier_after (jump);
2144 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2148 /* This is inefficient because it ends up copying to a save-register
2149 which then gets saved even though $lr has already been saved. But
2150 it does generate better code for leaf functions and we don't need
2151 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2152 used for __builtin_return_address anyway, so maybe we don't care if
2153 it's inefficient. */
2154 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2158 /* Given VAL, generate a constant appropriate for MODE.
2159 If MODE is a vector mode, every element will be VAL.
2160 For TImode, VAL will be zero extended to 128 bits. */
2162 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2168 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2169 || GET_MODE_CLASS (mode) == MODE_FLOAT
2170 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2171 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2173 if (GET_MODE_CLASS (mode) == MODE_INT)
2174 return immed_double_const (val, 0, mode);
2176 /* val is the bit representation of the float */
2177 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2178 return hwint_to_const_double (mode, val);
2180 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2181 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2183 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2185 units = GET_MODE_NUNITS (mode);
2187 v = rtvec_alloc (units);
2189 for (i = 0; i < units; ++i)
2190 RTVEC_ELT (v, i) = inner;
2192 return gen_rtx_CONST_VECTOR (mode, v);
2195 /* Create a MODE vector constant from 4 ints. */
2197 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2199 unsigned char arr[16];
2200 arr[0] = (a >> 24) & 0xff;
2201 arr[1] = (a >> 16) & 0xff;
2202 arr[2] = (a >> 8) & 0xff;
2203 arr[3] = (a >> 0) & 0xff;
2204 arr[4] = (b >> 24) & 0xff;
2205 arr[5] = (b >> 16) & 0xff;
2206 arr[6] = (b >> 8) & 0xff;
2207 arr[7] = (b >> 0) & 0xff;
2208 arr[8] = (c >> 24) & 0xff;
2209 arr[9] = (c >> 16) & 0xff;
2210 arr[10] = (c >> 8) & 0xff;
2211 arr[11] = (c >> 0) & 0xff;
2212 arr[12] = (d >> 24) & 0xff;
2213 arr[13] = (d >> 16) & 0xff;
2214 arr[14] = (d >> 8) & 0xff;
2215 arr[15] = (d >> 0) & 0xff;
2216 return array_to_constant(mode, arr);
2219 /* branch hint stuff */
2221 /* An array of these is used to propagate hints to predecessor blocks. */
2224 rtx prop_jump; /* propagated from another block */
2225 int bb_index; /* the original block. */
2227 static struct spu_bb_info *spu_bb_info;
2229 #define STOP_HINT_P(INSN) \
2230 (GET_CODE(INSN) == CALL_INSN \
2231 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2232 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2234 /* 1 when RTX is a hinted branch or its target. We keep track of
2235 what has been hinted so the safe-hint code can test it easily. */
2236 #define HINTED_P(RTX) \
2237 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2239 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2240 #define SCHED_ON_EVEN_P(RTX) \
2241 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2243 /* Emit a nop for INSN such that the two will dual issue. This assumes
2244 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2245 We check for TImode to handle a MULTI1 insn which has dual issued its
2246 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2249 emit_nop_for_insn (rtx insn)
2253 p = get_pipe (insn);
2254 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2255 new_insn = emit_insn_after (gen_lnop (), insn);
2256 else if (p == 1 && GET_MODE (insn) == TImode)
2258 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2259 PUT_MODE (new_insn, TImode);
2260 PUT_MODE (insn, VOIDmode);
2263 new_insn = emit_insn_after (gen_lnop (), insn);
2264 recog_memoized (new_insn);
2267 /* Insert nops in basic blocks to meet dual issue alignment
2268 requirements. Also make sure hbrp and hint instructions are at least
2269 one cycle apart, possibly inserting a nop. */
2273 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2277 /* This sets up INSN_ADDRESSES. */
2278 shorten_branches (get_insns ());
2280 /* Keep track of length added by nops. */
2284 insn = get_insns ();
2285 if (!active_insn_p (insn))
2286 insn = next_active_insn (insn);
2287 for (; insn; insn = next_insn)
2289 next_insn = next_active_insn (insn);
2290 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2291 || INSN_CODE (insn) == CODE_FOR_hbr)
2295 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2296 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2297 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2300 prev_insn = emit_insn_before (gen_lnop (), insn);
2301 PUT_MODE (prev_insn, GET_MODE (insn));
2302 PUT_MODE (insn, TImode);
2308 if (INSN_CODE (insn) == CODE_FOR_blockage)
2310 if (GET_MODE (insn) == TImode)
2311 PUT_MODE (next_insn, TImode);
2313 next_insn = next_active_insn (insn);
2315 addr = INSN_ADDRESSES (INSN_UID (insn));
2316 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2318 if (((addr + length) & 7) != 0)
2320 emit_nop_for_insn (prev_insn);
2324 else if (GET_MODE (insn) == TImode
2325 && ((next_insn && GET_MODE (next_insn) != TImode)
2326 || get_attr_type (insn) == TYPE_MULTI0)
2327 && ((addr + length) & 7) != 0)
2329 /* prev_insn will always be set because the first insn is
2330 always 8-byte aligned. */
2331 emit_nop_for_insn (prev_insn);
2339 /* Routines for branch hints. */
2342 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2343 int distance, sbitmap blocks)
2345 rtx branch_label = 0;
2350 if (before == 0 || branch == 0 || target == 0)
2353 /* While scheduling we require hints to be no further than 600, so
2354 we need to enforce that here too */
2358 /* If we have a Basic block note, emit it after the basic block note. */
2359 if (NOTE_INSN_BASIC_BLOCK_P (before))
2360 before = NEXT_INSN (before);
2362 branch_label = gen_label_rtx ();
2363 LABEL_NUSES (branch_label)++;
2364 LABEL_PRESERVE_P (branch_label) = 1;
2365 insn = emit_label_before (branch_label, branch);
2366 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2367 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2369 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2370 recog_memoized (hint);
2371 HINTED_P (branch) = 1;
2373 if (GET_CODE (target) == LABEL_REF)
2374 HINTED_P (XEXP (target, 0)) = 1;
2375 else if (tablejump_p (branch, 0, &table))
2379 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2380 vec = XVEC (PATTERN (table), 0);
2382 vec = XVEC (PATTERN (table), 1);
2383 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2384 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2387 if (distance >= 588)
2389 /* Make sure the hint isn't scheduled any earlier than this point,
2390 which could make it too far for the branch offest to fit */
2391 recog_memoized (emit_insn_before (gen_blockage (), hint));
2393 else if (distance <= 8 * 4)
2395 /* To guarantee at least 8 insns between the hint and branch we
2398 for (d = distance; d < 8 * 4; d += 4)
2401 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2402 recog_memoized (insn);
2405 /* Make sure any nops inserted aren't scheduled before the hint. */
2406 recog_memoized (emit_insn_after (gen_blockage (), hint));
2408 /* Make sure any nops inserted aren't scheduled after the call. */
2409 if (CALL_P (branch) && distance < 8 * 4)
2410 recog_memoized (emit_insn_before (gen_blockage (), branch));
2414 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2415 the rtx for the branch target. */
2417 get_branch_target (rtx branch)
2419 if (GET_CODE (branch) == JUMP_INSN)
2423 /* Return statements */
2424 if (GET_CODE (PATTERN (branch)) == RETURN)
2425 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2428 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2429 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2433 if (extract_asm_operands (PATTERN (branch)) != NULL)
2436 set = single_set (branch);
2437 src = SET_SRC (set);
2438 if (GET_CODE (SET_DEST (set)) != PC)
2441 if (GET_CODE (src) == IF_THEN_ELSE)
2444 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2447 /* If the more probable case is not a fall through, then
2448 try a branch hint. */
2449 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2450 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2451 && GET_CODE (XEXP (src, 1)) != PC)
2452 lab = XEXP (src, 1);
2453 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2454 && GET_CODE (XEXP (src, 2)) != PC)
2455 lab = XEXP (src, 2);
2459 if (GET_CODE (lab) == RETURN)
2460 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2468 else if (GET_CODE (branch) == CALL_INSN)
2471 /* All of our call patterns are in a PARALLEL and the CALL is
2472 the first pattern in the PARALLEL. */
2473 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2475 call = XVECEXP (PATTERN (branch), 0, 0);
2476 if (GET_CODE (call) == SET)
2477 call = SET_SRC (call);
2478 if (GET_CODE (call) != CALL)
2480 return XEXP (XEXP (call, 0), 0);
2485 /* The special $hbr register is used to prevent the insn scheduler from
2486 moving hbr insns across instructions which invalidate them. It
2487 should only be used in a clobber, and this function searches for
2488 insns which clobber it. */
2490 insn_clobbers_hbr (rtx insn)
2493 && GET_CODE (PATTERN (insn)) == PARALLEL)
2495 rtx parallel = PATTERN (insn);
2498 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2500 clobber = XVECEXP (parallel, 0, j);
2501 if (GET_CODE (clobber) == CLOBBER
2502 && GET_CODE (XEXP (clobber, 0)) == REG
2503 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2510 /* Search up to 32 insns starting at FIRST:
2511 - at any kind of hinted branch, just return
2512 - at any unconditional branch in the first 15 insns, just return
2513 - at a call or indirect branch, after the first 15 insns, force it to
2514 an even address and return
2515 - at any unconditional branch, after the first 15 insns, force it to
2517 At then end of the search, insert an hbrp within 4 insns of FIRST,
2518 and an hbrp within 16 instructions of FIRST.
2521 insert_hbrp_for_ilb_runout (rtx first)
2523 rtx insn, before_4 = 0, before_16 = 0;
2524 int addr = 0, length, first_addr = -1;
2525 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2526 int insert_lnop_after = 0;
2527 for (insn = first; insn; insn = NEXT_INSN (insn))
2530 if (first_addr == -1)
2531 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2532 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2533 length = get_attr_length (insn);
2535 if (before_4 == 0 && addr + length >= 4 * 4)
2537 /* We test for 14 instructions because the first hbrp will add
2538 up to 2 instructions. */
2539 if (before_16 == 0 && addr + length >= 14 * 4)
2542 if (INSN_CODE (insn) == CODE_FOR_hbr)
2544 /* Make sure an hbrp is at least 2 cycles away from a hint.
2545 Insert an lnop after the hbrp when necessary. */
2546 if (before_4 == 0 && addr > 0)
2549 insert_lnop_after |= 1;
2551 else if (before_4 && addr <= 4 * 4)
2552 insert_lnop_after |= 1;
2553 if (before_16 == 0 && addr > 10 * 4)
2556 insert_lnop_after |= 2;
2558 else if (before_16 && addr <= 14 * 4)
2559 insert_lnop_after |= 2;
2562 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2564 if (addr < hbrp_addr0)
2566 else if (addr < hbrp_addr1)
2570 if (CALL_P (insn) || JUMP_P (insn))
2572 if (HINTED_P (insn))
2575 /* Any branch after the first 15 insns should be on an even
2576 address to avoid a special case branch. There might be
2577 some nops and/or hbrps inserted, so we test after 10
2580 SCHED_ON_EVEN_P (insn) = 1;
2583 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2587 if (addr + length >= 32 * 4)
2589 gcc_assert (before_4 && before_16);
2590 if (hbrp_addr0 > 4 * 4)
2593 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2594 recog_memoized (insn);
2595 INSN_ADDRESSES_NEW (insn,
2596 INSN_ADDRESSES (INSN_UID (before_4)));
2597 PUT_MODE (insn, GET_MODE (before_4));
2598 PUT_MODE (before_4, TImode);
2599 if (insert_lnop_after & 1)
2601 insn = emit_insn_before (gen_lnop (), before_4);
2602 recog_memoized (insn);
2603 INSN_ADDRESSES_NEW (insn,
2604 INSN_ADDRESSES (INSN_UID (before_4)));
2605 PUT_MODE (insn, TImode);
2608 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2609 && hbrp_addr1 > 16 * 4)
2612 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2613 recog_memoized (insn);
2614 INSN_ADDRESSES_NEW (insn,
2615 INSN_ADDRESSES (INSN_UID (before_16)));
2616 PUT_MODE (insn, GET_MODE (before_16));
2617 PUT_MODE (before_16, TImode);
2618 if (insert_lnop_after & 2)
2620 insn = emit_insn_before (gen_lnop (), before_16);
2621 recog_memoized (insn);
2622 INSN_ADDRESSES_NEW (insn,
2623 INSN_ADDRESSES (INSN_UID
2625 PUT_MODE (insn, TImode);
2631 else if (BARRIER_P (insn))
2636 /* The SPU might hang when it executes 48 inline instructions after a
2637 hinted branch jumps to its hinted target. The beginning of a
2638 function and the return from a call might have been hinted, and must
2639 be handled as well. To prevent a hang we insert 2 hbrps. The first
2640 should be within 6 insns of the branch target. The second should be
2641 within 22 insns of the branch target. When determining if hbrps are
2642 necessary, we look for only 32 inline instructions, because up to to
2643 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2644 new hbrps, we insert them within 4 and 16 insns of the target. */
2649 if (TARGET_SAFE_HINTS)
2651 shorten_branches (get_insns ());
2652 /* Insert hbrp at beginning of function */
2653 insn = next_active_insn (get_insns ());
2655 insert_hbrp_for_ilb_runout (insn);
2656 /* Insert hbrp after hinted targets. */
2657 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2658 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2659 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2663 static int in_spu_reorg;
2665 /* Insert branch hints. There are no branch optimizations after this
2666 pass, so it's safe to set our branch hints now. */
2668 spu_machine_dependent_reorg (void)
2673 rtx branch_target = 0;
2674 int branch_addr = 0, insn_addr, required_dist = 0;
2678 if (!TARGET_BRANCH_HINTS || optimize == 0)
2680 /* We still do it for unoptimized code because an external
2681 function might have hinted a call or return. */
2687 blocks = sbitmap_alloc (last_basic_block);
2688 sbitmap_zero (blocks);
2691 compute_bb_for_insn ();
2696 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2697 sizeof (struct spu_bb_info));
2699 /* We need exact insn addresses and lengths. */
2700 shorten_branches (get_insns ());
2702 for (i = n_basic_blocks - 1; i >= 0; i--)
2704 bb = BASIC_BLOCK (i);
2706 if (spu_bb_info[i].prop_jump)
2708 branch = spu_bb_info[i].prop_jump;
2709 branch_target = get_branch_target (branch);
2710 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2711 required_dist = spu_hint_dist;
2713 /* Search from end of a block to beginning. In this loop, find
2714 jumps which need a branch and emit them only when:
2715 - it's an indirect branch and we're at the insn which sets
2717 - we're at an insn that will invalidate the hint. e.g., a
2718 call, another hint insn, inline asm that clobbers $hbr, and
2719 some inlined operations (divmodsi4). Don't consider jumps
2720 because they are only at the end of a block and are
2721 considered when we are deciding whether to propagate
2722 - we're getting too far away from the branch. The hbr insns
2723 only have a signed 10 bit offset
2724 We go back as far as possible so the branch will be considered
2725 for propagation when we get to the beginning of the block. */
2726 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2730 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2732 && ((GET_CODE (branch_target) == REG
2733 && set_of (branch_target, insn) != NULL_RTX)
2734 || insn_clobbers_hbr (insn)
2735 || branch_addr - insn_addr > 600))
2737 rtx next = NEXT_INSN (insn);
2738 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2739 if (insn != BB_END (bb)
2740 && branch_addr - next_addr >= required_dist)
2744 "hint for %i in block %i before %i\n",
2745 INSN_UID (branch), bb->index,
2747 spu_emit_branch_hint (next, branch, branch_target,
2748 branch_addr - next_addr, blocks);
2753 /* JUMP_P will only be true at the end of a block. When
2754 branch is already set it means we've previously decided
2755 to propagate a hint for that branch into this block. */
2756 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2759 if ((branch_target = get_branch_target (insn)))
2762 branch_addr = insn_addr;
2763 required_dist = spu_hint_dist;
2767 if (insn == BB_HEAD (bb))
2773 /* If we haven't emitted a hint for this branch yet, it might
2774 be profitable to emit it in one of the predecessor blocks,
2775 especially for loops. */
2777 basic_block prev = 0, prop = 0, prev2 = 0;
2778 int loop_exit = 0, simple_loop = 0;
2779 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2781 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2782 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2783 prev = EDGE_PRED (bb, j)->src;
2785 prev2 = EDGE_PRED (bb, j)->src;
2787 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2788 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2790 else if (EDGE_SUCC (bb, j)->dest == bb)
2793 /* If this branch is a loop exit then propagate to previous
2794 fallthru block. This catches the cases when it is a simple
2795 loop or when there is an initial branch into the loop. */
2796 if (prev && (loop_exit || simple_loop)
2797 && prev->loop_depth <= bb->loop_depth)
2800 /* If there is only one adjacent predecessor. Don't propagate
2801 outside this loop. This loop_depth test isn't perfect, but
2802 I'm not sure the loop_father member is valid at this point. */
2803 else if (prev && single_pred_p (bb)
2804 && prev->loop_depth == bb->loop_depth)
2807 /* If this is the JOIN block of a simple IF-THEN then
2808 propogate the hint to the HEADER block. */
2809 else if (prev && prev2
2810 && EDGE_COUNT (bb->preds) == 2
2811 && EDGE_COUNT (prev->preds) == 1
2812 && EDGE_PRED (prev, 0)->src == prev2
2813 && prev2->loop_depth == bb->loop_depth
2814 && GET_CODE (branch_target) != REG)
2817 /* Don't propagate when:
2818 - this is a simple loop and the hint would be too far
2819 - this is not a simple loop and there are 16 insns in
2821 - the predecessor block ends in a branch that will be
2823 - the predecessor block ends in an insn that invalidates
2827 && (bbend = BB_END (prop))
2828 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2829 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2830 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2833 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2834 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2835 bb->index, prop->index, bb->loop_depth,
2836 INSN_UID (branch), loop_exit, simple_loop,
2837 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2839 spu_bb_info[prop->index].prop_jump = branch;
2840 spu_bb_info[prop->index].bb_index = i;
2842 else if (branch_addr - next_addr >= required_dist)
2845 fprintf (dump_file, "hint for %i in block %i before %i\n",
2846 INSN_UID (branch), bb->index,
2847 INSN_UID (NEXT_INSN (insn)));
2848 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2849 branch_addr - next_addr, blocks);
2856 if (!sbitmap_empty_p (blocks))
2857 find_many_sub_basic_blocks (blocks);
2859 /* We have to schedule to make sure alignment is ok. */
2860 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2862 /* The hints need to be scheduled, so call it again. */
2869 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2870 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2872 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2873 between its branch label and the branch . We don't move the
2874 label because GCC expects it at the beginning of the block. */
2875 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2876 rtx label_ref = XVECEXP (unspec, 0, 0);
2877 rtx label = XEXP (label_ref, 0);
2880 for (branch = NEXT_INSN (label);
2881 !JUMP_P (branch) && !CALL_P (branch);
2882 branch = NEXT_INSN (branch))
2883 if (NONJUMP_INSN_P (branch))
2884 offset += get_attr_length (branch);
2886 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2889 if (spu_flag_var_tracking)
2892 timevar_push (TV_VAR_TRACKING);
2893 variable_tracking_main ();
2894 timevar_pop (TV_VAR_TRACKING);
2895 df_finish_pass (false);
2898 free_bb_for_insn ();
2904 /* Insn scheduling routines, primarily for dual issue. */
2906 spu_sched_issue_rate (void)
2912 uses_ls_unit(rtx insn)
2914 rtx set = single_set (insn);
2916 && (GET_CODE (SET_DEST (set)) == MEM
2917 || GET_CODE (SET_SRC (set)) == MEM))
2926 /* Handle inline asm */
2927 if (INSN_CODE (insn) == -1)
2929 t = get_attr_type (insn);
2954 case TYPE_IPREFETCH:
2962 /* haifa-sched.c has a static variable that keeps track of the current
2963 cycle. It is passed to spu_sched_reorder, and we record it here for
2964 use by spu_sched_variable_issue. It won't be accurate if the
2965 scheduler updates it's clock_var between the two calls. */
2966 static int clock_var;
2968 /* This is used to keep track of insn alignment. Set to 0 at the
2969 beginning of each block and increased by the "length" attr of each
2971 static int spu_sched_length;
2973 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2974 ready list appropriately in spu_sched_reorder(). */
2975 static int pipe0_clock;
2976 static int pipe1_clock;
2978 static int prev_clock_var;
2980 static int prev_priority;
2982 /* The SPU needs to load the next ilb sometime during the execution of
2983 the previous ilb. There is a potential conflict if every cycle has a
2984 load or store. To avoid the conflict we make sure the load/store
2985 unit is free for at least one cycle during the execution of insns in
2986 the previous ilb. */
2987 static int spu_ls_first;
2988 static int prev_ls_clock;
2991 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2992 int max_ready ATTRIBUTE_UNUSED)
2994 spu_sched_length = 0;
2998 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2999 int max_ready ATTRIBUTE_UNUSED)
3001 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3003 /* When any block might be at least 8-byte aligned, assume they
3004 will all be at least 8-byte aligned to make sure dual issue
3005 works out correctly. */
3006 spu_sched_length = 0;
3008 spu_ls_first = INT_MAX;
3013 prev_clock_var = -1;
3018 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3019 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3023 if (GET_CODE (PATTERN (insn)) == USE
3024 || GET_CODE (PATTERN (insn)) == CLOBBER
3025 || (len = get_attr_length (insn)) == 0)
3028 spu_sched_length += len;
3030 /* Reset on inline asm */
3031 if (INSN_CODE (insn) == -1)
3033 spu_ls_first = INT_MAX;
3038 p = get_pipe (insn);
3040 pipe0_clock = clock_var;
3042 pipe1_clock = clock_var;
3046 if (clock_var - prev_ls_clock > 1
3047 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3048 spu_ls_first = INT_MAX;
3049 if (uses_ls_unit (insn))
3051 if (spu_ls_first == INT_MAX)
3052 spu_ls_first = spu_sched_length;
3053 prev_ls_clock = clock_var;
3056 /* The scheduler hasn't inserted the nop, but we will later on.
3057 Include those nops in spu_sched_length. */
3058 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3059 spu_sched_length += 4;
3060 prev_clock_var = clock_var;
3062 /* more is -1 when called from spu_sched_reorder for new insns
3063 that don't have INSN_PRIORITY */
3065 prev_priority = INSN_PRIORITY (insn);
3068 /* Always try issueing more insns. spu_sched_reorder will decide
3069 when the cycle should be advanced. */
3073 /* This function is called for both TARGET_SCHED_REORDER and
3074 TARGET_SCHED_REORDER2. */
3076 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3077 rtx *ready, int *nreadyp, int clock)
3079 int i, nready = *nreadyp;
3080 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3085 if (nready <= 0 || pipe1_clock >= clock)
3088 /* Find any rtl insns that don't generate assembly insns and schedule
3090 for (i = nready - 1; i >= 0; i--)
3093 if (INSN_CODE (insn) == -1
3094 || INSN_CODE (insn) == CODE_FOR_blockage
3095 || (INSN_P (insn) && get_attr_length (insn) == 0))
3097 ready[i] = ready[nready - 1];
3098 ready[nready - 1] = insn;
3103 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3104 for (i = 0; i < nready; i++)
3105 if (INSN_CODE (ready[i]) != -1)
3108 switch (get_attr_type (insn))
3133 case TYPE_IPREFETCH:
3139 /* In the first scheduling phase, schedule loads and stores together
3140 to increase the chance they will get merged during postreload CSE. */
3141 if (!reload_completed && pipe_ls >= 0)
3143 insn = ready[pipe_ls];
3144 ready[pipe_ls] = ready[nready - 1];
3145 ready[nready - 1] = insn;
3149 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3153 /* When we have loads/stores in every cycle of the last 15 insns and
3154 we are about to schedule another load/store, emit an hbrp insn
3157 && spu_sched_length - spu_ls_first >= 4 * 15
3158 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3160 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3161 recog_memoized (insn);
3162 if (pipe0_clock < clock)
3163 PUT_MODE (insn, TImode);
3164 spu_sched_variable_issue (file, verbose, insn, -1);
3168 /* In general, we want to emit nops to increase dual issue, but dual
3169 issue isn't faster when one of the insns could be scheduled later
3170 without effecting the critical path. We look at INSN_PRIORITY to
3171 make a good guess, but it isn't perfect so -mdual-nops=n can be
3172 used to effect it. */
3173 if (in_spu_reorg && spu_dual_nops < 10)
3175 /* When we are at an even address and we are not issueing nops to
3176 improve scheduling then we need to advance the cycle. */
3177 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3178 && (spu_dual_nops == 0
3181 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3184 /* When at an odd address, schedule the highest priority insn
3185 without considering pipeline. */
3186 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3187 && (spu_dual_nops == 0
3189 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3194 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3195 pipe0 insn in the ready list, schedule it. */
3196 if (pipe0_clock < clock && pipe_0 >= 0)
3197 schedule_i = pipe_0;
3199 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3200 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3202 schedule_i = pipe_1;
3204 if (schedule_i > -1)
3206 insn = ready[schedule_i];
3207 ready[schedule_i] = ready[nready - 1];
3208 ready[nready - 1] = insn;
3214 /* INSN is dependent on DEP_INSN. */
3216 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3220 /* The blockage pattern is used to prevent instructions from being
3221 moved across it and has no cost. */
3222 if (INSN_CODE (insn) == CODE_FOR_blockage
3223 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3226 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3227 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3230 /* Make sure hbrps are spread out. */
3231 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3232 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3235 /* Make sure hints and hbrps are 2 cycles apart. */
3236 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3237 || INSN_CODE (insn) == CODE_FOR_hbr)
3238 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3239 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3242 /* An hbrp has no real dependency on other insns. */
3243 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3244 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3247 /* Assuming that it is unlikely an argument register will be used in
3248 the first cycle of the called function, we reduce the cost for
3249 slightly better scheduling of dep_insn. When not hinted, the
3250 mispredicted branch would hide the cost as well. */
3253 rtx target = get_branch_target (insn);
3254 if (GET_CODE (target) != REG || !set_of (target, insn))
3259 /* And when returning from a function, let's assume the return values
3260 are completed sooner too. */
3261 if (CALL_P (dep_insn))
3264 /* Make sure an instruction that loads from the back chain is schedule
3265 away from the return instruction so a hint is more likely to get
3267 if (INSN_CODE (insn) == CODE_FOR__return
3268 && (set = single_set (dep_insn))
3269 && GET_CODE (SET_DEST (set)) == REG
3270 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3273 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3274 scheduler makes every insn in a block anti-dependent on the final
3275 jump_insn. We adjust here so higher cost insns will get scheduled
3277 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3278 return insn_cost (dep_insn) - 3;
3283 /* Create a CONST_DOUBLE from a string. */
3285 spu_float_const (const char *string, enum machine_mode mode)
3287 REAL_VALUE_TYPE value;
3288 value = REAL_VALUE_ATOF (string, mode);
3289 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3293 spu_constant_address_p (rtx x)
3295 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3296 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3297 || GET_CODE (x) == HIGH);
3300 static enum spu_immediate
3301 which_immediate_load (HOST_WIDE_INT val)
3303 gcc_assert (val == trunc_int_for_mode (val, SImode));
3305 if (val >= -0x8000 && val <= 0x7fff)
3307 if (val >= 0 && val <= 0x3ffff)
3309 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3311 if ((val & 0xffff) == 0)
3317 /* Return true when OP can be loaded by one of the il instructions, or
3318 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3320 immediate_load_p (rtx op, enum machine_mode mode)
3322 if (CONSTANT_P (op))
3324 enum immediate_class c = classify_immediate (op, mode);
3325 return c == IC_IL1 || c == IC_IL1s
3326 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3331 /* Return true if the first SIZE bytes of arr is a constant that can be
3332 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3333 represent the size and offset of the instruction to use. */
3335 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3337 int cpat, run, i, start;
3341 for (i = 0; i < size && cpat; i++)
3349 else if (arr[i] == 2 && arr[i+1] == 3)
3351 else if (arr[i] == 0)
3353 while (arr[i+run] == run && i+run < 16)
3355 if (run != 4 && run != 8)
3360 if ((i & (run-1)) != 0)
3367 if (cpat && (run || size < 16))
3374 *pstart = start == -1 ? 16-run : start;
3380 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3381 it into a register. MODE is only valid when OP is a CONST_INT. */
3382 static enum immediate_class
3383 classify_immediate (rtx op, enum machine_mode mode)
3386 unsigned char arr[16];
3387 int i, j, repeated, fsmbi, repeat;
3389 gcc_assert (CONSTANT_P (op));
3391 if (GET_MODE (op) != VOIDmode)
3392 mode = GET_MODE (op);
3394 /* A V4SI const_vector with all identical symbols is ok. */
3397 && GET_CODE (op) == CONST_VECTOR
3398 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3399 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3400 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3401 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3402 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3403 op = CONST_VECTOR_ELT (op, 0);
3405 switch (GET_CODE (op))
3409 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3412 /* We can never know if the resulting address fits in 18 bits and can be
3413 loaded with ila. For now, assume the address will not overflow if
3414 the displacement is "small" (fits 'K' constraint). */
3415 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3417 rtx sym = XEXP (XEXP (op, 0), 0);
3418 rtx cst = XEXP (XEXP (op, 0), 1);
3420 if (GET_CODE (sym) == SYMBOL_REF
3421 && GET_CODE (cst) == CONST_INT
3422 && satisfies_constraint_K (cst))
3431 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3432 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3433 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3439 constant_to_array (mode, op, arr);
3441 /* Check that each 4-byte slot is identical. */
3443 for (i = 4; i < 16; i += 4)
3444 for (j = 0; j < 4; j++)
3445 if (arr[j] != arr[i + j])
3450 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3451 val = trunc_int_for_mode (val, SImode);
3453 if (which_immediate_load (val) != SPU_NONE)
3457 /* Any mode of 2 bytes or smaller can be loaded with an il
3459 gcc_assert (GET_MODE_SIZE (mode) > 2);
3463 for (i = 0; i < 16 && fsmbi; i++)
3464 if (arr[i] != 0 && repeat == 0)
3466 else if (arr[i] != 0 && arr[i] != repeat)
3469 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3471 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3484 static enum spu_immediate
3485 which_logical_immediate (HOST_WIDE_INT val)
3487 gcc_assert (val == trunc_int_for_mode (val, SImode));
3489 if (val >= -0x200 && val <= 0x1ff)
3491 if (val >= 0 && val <= 0xffff)
3493 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3495 val = trunc_int_for_mode (val, HImode);
3496 if (val >= -0x200 && val <= 0x1ff)
3498 if ((val & 0xff) == ((val >> 8) & 0xff))
3500 val = trunc_int_for_mode (val, QImode);
3501 if (val >= -0x200 && val <= 0x1ff)
3508 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3511 const_vector_immediate_p (rtx x)
3514 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3515 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3516 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3517 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3523 logical_immediate_p (rtx op, enum machine_mode mode)
3526 unsigned char arr[16];
3529 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3530 || GET_CODE (op) == CONST_VECTOR);
3532 if (GET_CODE (op) == CONST_VECTOR
3533 && !const_vector_immediate_p (op))
3536 if (GET_MODE (op) != VOIDmode)
3537 mode = GET_MODE (op);
3539 constant_to_array (mode, op, arr);
3541 /* Check that bytes are repeated. */
3542 for (i = 4; i < 16; i += 4)
3543 for (j = 0; j < 4; j++)
3544 if (arr[j] != arr[i + j])
3547 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3548 val = trunc_int_for_mode (val, SImode);
3550 i = which_logical_immediate (val);
3551 return i != SPU_NONE && i != SPU_IOHL;
3555 iohl_immediate_p (rtx op, enum machine_mode mode)
3558 unsigned char arr[16];
3561 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3562 || GET_CODE (op) == CONST_VECTOR);
3564 if (GET_CODE (op) == CONST_VECTOR
3565 && !const_vector_immediate_p (op))
3568 if (GET_MODE (op) != VOIDmode)
3569 mode = GET_MODE (op);
3571 constant_to_array (mode, op, arr);
3573 /* Check that bytes are repeated. */
3574 for (i = 4; i < 16; i += 4)
3575 for (j = 0; j < 4; j++)
3576 if (arr[j] != arr[i + j])
3579 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3580 val = trunc_int_for_mode (val, SImode);
3582 return val >= 0 && val <= 0xffff;
3586 arith_immediate_p (rtx op, enum machine_mode mode,
3587 HOST_WIDE_INT low, HOST_WIDE_INT high)
3590 unsigned char arr[16];
3593 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3594 || GET_CODE (op) == CONST_VECTOR);
3596 if (GET_CODE (op) == CONST_VECTOR
3597 && !const_vector_immediate_p (op))
3600 if (GET_MODE (op) != VOIDmode)
3601 mode = GET_MODE (op);
3603 constant_to_array (mode, op, arr);
3605 if (VECTOR_MODE_P (mode))
3606 mode = GET_MODE_INNER (mode);
3608 bytes = GET_MODE_SIZE (mode);
3609 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3611 /* Check that bytes are repeated. */
3612 for (i = bytes; i < 16; i += bytes)
3613 for (j = 0; j < bytes; j++)
3614 if (arr[j] != arr[i + j])
3618 for (j = 1; j < bytes; j++)
3619 val = (val << 8) | arr[j];
3621 val = trunc_int_for_mode (val, mode);
3623 return val >= low && val <= high;
3626 /* TRUE when op is an immediate and an exact power of 2, and given that
3627 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3628 all entries must be the same. */
3630 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3632 enum machine_mode int_mode;
3634 unsigned char arr[16];
3637 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3638 || GET_CODE (op) == CONST_VECTOR);
3640 if (GET_CODE (op) == CONST_VECTOR
3641 && !const_vector_immediate_p (op))
3644 if (GET_MODE (op) != VOIDmode)
3645 mode = GET_MODE (op);
3647 constant_to_array (mode, op, arr);
3649 if (VECTOR_MODE_P (mode))
3650 mode = GET_MODE_INNER (mode);
3652 bytes = GET_MODE_SIZE (mode);
3653 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3655 /* Check that bytes are repeated. */
3656 for (i = bytes; i < 16; i += bytes)
3657 for (j = 0; j < bytes; j++)
3658 if (arr[j] != arr[i + j])
3662 for (j = 1; j < bytes; j++)
3663 val = (val << 8) | arr[j];
3665 val = trunc_int_for_mode (val, int_mode);
3667 /* Currently, we only handle SFmode */
3668 gcc_assert (mode == SFmode);
3671 int exp = (val >> 23) - 127;
3672 return val > 0 && (val & 0x007fffff) == 0
3673 && exp >= low && exp <= high;
3678 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3681 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3686 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3688 rtx plus = XEXP (x, 0);
3689 rtx op0 = XEXP (plus, 0);
3690 rtx op1 = XEXP (plus, 1);
3691 if (GET_CODE (op1) == CONST_INT)
3695 return (GET_CODE (x) == SYMBOL_REF
3696 && (decl = SYMBOL_REF_DECL (x)) != 0
3697 && TREE_CODE (decl) == VAR_DECL
3698 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3702 - any 32-bit constant (SImode, SFmode)
3703 - any constant that can be generated with fsmbi (any mode)
3704 - a 64-bit constant where the high and low bits are identical
3706 - a 128-bit constant where the four 32-bit words match. */
3708 spu_legitimate_constant_p (rtx x)
3710 if (GET_CODE (x) == HIGH)
3713 /* Reject any __ea qualified reference. These can't appear in
3714 instructions but must be forced to the constant pool. */
3715 if (for_each_rtx (&x, ea_symbol_ref, 0))
3718 /* V4SI with all identical symbols is valid. */
3720 && GET_MODE (x) == V4SImode
3721 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3722 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3723 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3724 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3725 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3726 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3728 if (GET_CODE (x) == CONST_VECTOR
3729 && !const_vector_immediate_p (x))
3734 /* Valid address are:
3735 - symbol_ref, label_ref, const
3737 - reg + const_int, where const_int is 16 byte aligned
3738 - reg + reg, alignment doesn't matter
3739 The alignment matters in the reg+const case because lqd and stqd
3740 ignore the 4 least significant bits of the const. We only care about
3741 16 byte modes because the expand phase will change all smaller MEM
3742 references to TImode. */
3744 spu_legitimate_address_p (enum machine_mode mode,
3745 rtx x, bool reg_ok_strict)
3747 int aligned = GET_MODE_SIZE (mode) >= 16;
3749 && GET_CODE (x) == AND
3750 && GET_CODE (XEXP (x, 1)) == CONST_INT
3751 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3753 switch (GET_CODE (x))
3756 return !TARGET_LARGE_MEM;
3760 /* Keep __ea references until reload so that spu_expand_mov can see them
3762 if (ea_symbol_ref (&x, 0))
3763 return !reload_in_progress && !reload_completed;
3764 return !TARGET_LARGE_MEM;
3767 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3775 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3780 rtx op0 = XEXP (x, 0);
3781 rtx op1 = XEXP (x, 1);
3782 if (GET_CODE (op0) == SUBREG)
3783 op0 = XEXP (op0, 0);
3784 if (GET_CODE (op1) == SUBREG)
3785 op1 = XEXP (op1, 0);
3786 if (GET_CODE (op0) == REG
3787 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3788 && GET_CODE (op1) == CONST_INT
3789 && INTVAL (op1) >= -0x2000
3790 && INTVAL (op1) <= 0x1fff
3791 && (!aligned || (INTVAL (op1) & 15) == 0))
3793 if (GET_CODE (op0) == REG
3794 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3795 && GET_CODE (op1) == REG
3796 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3807 /* Like spu_legitimate_address_p, except with named addresses. */
3809 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3810 bool reg_ok_strict, addr_space_t as)
3812 if (as == ADDR_SPACE_EA)
3813 return (REG_P (x) && (GET_MODE (x) == EAmode));
3815 else if (as != ADDR_SPACE_GENERIC)
3818 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3821 /* When the address is reg + const_int, force the const_int into a
3824 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3825 enum machine_mode mode ATTRIBUTE_UNUSED)
3828 /* Make sure both operands are registers. */
3829 if (GET_CODE (x) == PLUS)
3833 if (ALIGNED_SYMBOL_REF_P (op0))
3835 op0 = force_reg (Pmode, op0);
3836 mark_reg_pointer (op0, 128);
3838 else if (GET_CODE (op0) != REG)
3839 op0 = force_reg (Pmode, op0);
3840 if (ALIGNED_SYMBOL_REF_P (op1))
3842 op1 = force_reg (Pmode, op1);
3843 mark_reg_pointer (op1, 128);
3845 else if (GET_CODE (op1) != REG)
3846 op1 = force_reg (Pmode, op1);
3847 x = gen_rtx_PLUS (Pmode, op0, op1);
3852 /* Like spu_legitimate_address, except with named address support. */
3854 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3857 if (as != ADDR_SPACE_GENERIC)
3860 return spu_legitimize_address (x, oldx, mode);
3863 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3864 struct attribute_spec.handler. */
3866 spu_handle_fndecl_attribute (tree * node,
3868 tree args ATTRIBUTE_UNUSED,
3869 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3871 if (TREE_CODE (*node) != FUNCTION_DECL)
3873 warning (0, "%qE attribute only applies to functions",
3875 *no_add_attrs = true;
3881 /* Handle the "vector" attribute. */
3883 spu_handle_vector_attribute (tree * node, tree name,
3884 tree args ATTRIBUTE_UNUSED,
3885 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3887 tree type = *node, result = NULL_TREE;
3888 enum machine_mode mode;
3891 while (POINTER_TYPE_P (type)
3892 || TREE_CODE (type) == FUNCTION_TYPE
3893 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3894 type = TREE_TYPE (type);
3896 mode = TYPE_MODE (type);
3898 unsigned_p = TYPE_UNSIGNED (type);
3902 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3905 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3908 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3911 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3914 result = V4SF_type_node;
3917 result = V2DF_type_node;
3923 /* Propagate qualifiers attached to the element type
3924 onto the vector type. */
3925 if (result && result != type && TYPE_QUALS (type))
3926 result = build_qualified_type (result, TYPE_QUALS (type));
3928 *no_add_attrs = true; /* No need to hang on to the attribute. */
3931 warning (0, "%qE attribute ignored", name);
3933 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3938 /* Return nonzero if FUNC is a naked function. */
3940 spu_naked_function_p (tree func)
3944 if (TREE_CODE (func) != FUNCTION_DECL)
3947 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3948 return a != NULL_TREE;
3952 spu_initial_elimination_offset (int from, int to)
3954 int saved_regs_size = spu_saved_regs_size ();
3956 if (!current_function_is_leaf || crtl->outgoing_args_size
3957 || get_frame_size () || saved_regs_size)
3958 sp_offset = STACK_POINTER_OFFSET;
3959 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3960 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3961 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3962 return get_frame_size ();
3963 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3964 return sp_offset + crtl->outgoing_args_size
3965 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3966 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3967 return get_frame_size () + saved_regs_size + sp_offset;
3973 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3975 enum machine_mode mode = TYPE_MODE (type);
3976 int byte_size = ((mode == BLKmode)
3977 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3979 /* Make sure small structs are left justified in a register. */
3980 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3981 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3983 enum machine_mode smode;
3986 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3987 int n = byte_size / UNITS_PER_WORD;
3988 v = rtvec_alloc (nregs);
3989 for (i = 0; i < n; i++)
3991 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3992 gen_rtx_REG (TImode,
3995 GEN_INT (UNITS_PER_WORD * i));
3996 byte_size -= UNITS_PER_WORD;
4004 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4006 gen_rtx_EXPR_LIST (VOIDmode,
4007 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4008 GEN_INT (UNITS_PER_WORD * n));
4010 return gen_rtx_PARALLEL (mode, v);
4012 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4016 spu_function_arg (CUMULATIVE_ARGS *cum,
4017 enum machine_mode mode,
4018 const_tree type, bool named ATTRIBUTE_UNUSED)
4022 if (cum >= MAX_REGISTER_ARGS)
4025 byte_size = ((mode == BLKmode)
4026 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4028 /* The ABI does not allow parameters to be passed partially in
4029 reg and partially in stack. */
4030 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4033 /* Make sure small structs are left justified in a register. */
4034 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4035 && byte_size < UNITS_PER_WORD && byte_size > 0)
4037 enum machine_mode smode;
4041 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4042 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4043 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4045 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4048 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4052 spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4053 const_tree type, bool named ATTRIBUTE_UNUSED)
4055 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4058 ? ((int_size_in_bytes (type) + 15) / 16)
4061 : HARD_REGNO_NREGS (cum, mode));
4064 /* Variable sized types are passed by reference. */
4066 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4067 enum machine_mode mode ATTRIBUTE_UNUSED,
4068 const_tree type, bool named ATTRIBUTE_UNUSED)
4070 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4076 /* Create and return the va_list datatype.
4078 On SPU, va_list is an array type equivalent to
4080 typedef struct __va_list_tag
4082 void *__args __attribute__((__aligned(16)));
4083 void *__skip __attribute__((__aligned(16)));
4087 where __args points to the arg that will be returned by the next
4088 va_arg(), and __skip points to the previous stack frame such that
4089 when __args == __skip we should advance __args by 32 bytes. */
4091 spu_build_builtin_va_list (void)
4093 tree f_args, f_skip, record, type_decl;
4096 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4099 build_decl (BUILTINS_LOCATION,
4100 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4102 f_args = build_decl (BUILTINS_LOCATION,
4103 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4104 f_skip = build_decl (BUILTINS_LOCATION,
4105 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4107 DECL_FIELD_CONTEXT (f_args) = record;
4108 DECL_ALIGN (f_args) = 128;
4109 DECL_USER_ALIGN (f_args) = 1;
4111 DECL_FIELD_CONTEXT (f_skip) = record;
4112 DECL_ALIGN (f_skip) = 128;
4113 DECL_USER_ALIGN (f_skip) = 1;
4115 TREE_CHAIN (record) = type_decl;
4116 TYPE_NAME (record) = type_decl;
4117 TYPE_FIELDS (record) = f_args;
4118 DECL_CHAIN (f_args) = f_skip;
4120 /* We know this is being padded and we want it too. It is an internal
4121 type so hide the warnings from the user. */
4123 warn_padded = false;
4125 layout_type (record);
4129 /* The correct type is an array type of one element. */
4130 return build_array_type (record, build_index_type (size_zero_node));
4133 /* Implement va_start by filling the va_list structure VALIST.
4134 NEXTARG points to the first anonymous stack argument.
4136 The following global variables are used to initialize
4137 the va_list structure:
4140 the CUMULATIVE_ARGS for this function
4142 crtl->args.arg_offset_rtx:
4143 holds the offset of the first anonymous stack argument
4144 (relative to the virtual arg pointer). */
4147 spu_va_start (tree valist, rtx nextarg)
4149 tree f_args, f_skip;
4152 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4153 f_skip = DECL_CHAIN (f_args);
4155 valist = build_va_arg_indirect_ref (valist);
4157 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4159 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4161 /* Find the __args area. */
4162 t = make_tree (TREE_TYPE (args), nextarg);
4163 if (crtl->args.pretend_args_size > 0)
4164 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4165 size_int (-STACK_POINTER_OFFSET));
4166 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4167 TREE_SIDE_EFFECTS (t) = 1;
4168 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4170 /* Find the __skip area. */
4171 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4172 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4173 size_int (crtl->args.pretend_args_size
4174 - STACK_POINTER_OFFSET));
4175 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4176 TREE_SIDE_EFFECTS (t) = 1;
4177 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4180 /* Gimplify va_arg by updating the va_list structure
4181 VALIST as required to retrieve an argument of type
4182 TYPE, and returning that argument.
4184 ret = va_arg(VALIST, TYPE);
4186 generates code equivalent to:
4188 paddedsize = (sizeof(TYPE) + 15) & -16;
4189 if (VALIST.__args + paddedsize > VALIST.__skip
4190 && VALIST.__args <= VALIST.__skip)
4191 addr = VALIST.__skip + 32;
4193 addr = VALIST.__args;
4194 VALIST.__args = addr + paddedsize;
4195 ret = *(TYPE *)addr;
4198 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4199 gimple_seq * post_p ATTRIBUTE_UNUSED)
4201 tree f_args, f_skip;
4203 HOST_WIDE_INT size, rsize;
4204 tree paddedsize, addr, tmp;
4205 bool pass_by_reference_p;
4207 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4208 f_skip = DECL_CHAIN (f_args);
4210 valist = build_simple_mem_ref (valist);
4212 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4214 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4216 addr = create_tmp_var (ptr_type_node, "va_arg");
4218 /* if an object is dynamically sized, a pointer to it is passed
4219 instead of the object itself. */
4220 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4222 if (pass_by_reference_p)
4223 type = build_pointer_type (type);
4224 size = int_size_in_bytes (type);
4225 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4227 /* build conditional expression to calculate addr. The expression
4228 will be gimplified later. */
4229 paddedsize = size_int (rsize);
4230 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4231 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4232 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4233 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4234 unshare_expr (skip)));
4236 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4237 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4238 size_int (32)), unshare_expr (args));
4240 gimplify_assign (addr, tmp, pre_p);
4242 /* update VALIST.__args */
4243 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4244 gimplify_assign (unshare_expr (args), tmp, pre_p);
4246 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4249 if (pass_by_reference_p)
4250 addr = build_va_arg_indirect_ref (addr);
4252 return build_va_arg_indirect_ref (addr);
4255 /* Save parameter registers starting with the register that corresponds
4256 to the first unnamed parameters. If the first unnamed parameter is
4257 in the stack then save no registers. Set pretend_args_size to the
4258 amount of space needed to save the registers. */
4260 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4261 tree type, int *pretend_size, int no_rtl)
4270 /* cum currently points to the last named argument, we want to
4271 start at the next argument. */
4272 spu_function_arg_advance (&ncum, mode, type, true);
4274 offset = -STACK_POINTER_OFFSET;
4275 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4277 tmp = gen_frame_mem (V4SImode,
4278 plus_constant (virtual_incoming_args_rtx,
4280 emit_move_insn (tmp,
4281 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4284 *pretend_size = offset + STACK_POINTER_OFFSET;
4289 spu_conditional_register_usage (void)
4293 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4294 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4298 /* This is called any time we inspect the alignment of a register for
4301 reg_aligned_for_addr (rtx x)
4304 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4305 return REGNO_POINTER_ALIGN (regno) >= 128;
4308 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4309 into its SYMBOL_REF_FLAGS. */
4311 spu_encode_section_info (tree decl, rtx rtl, int first)
4313 default_encode_section_info (decl, rtl, first);
4315 /* If a variable has a forced alignment to < 16 bytes, mark it with
4316 SYMBOL_FLAG_ALIGN1. */
4317 if (TREE_CODE (decl) == VAR_DECL
4318 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4319 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4322 /* Return TRUE if we are certain the mem refers to a complete object
4323 which is both 16-byte aligned and padded to a 16-byte boundary. This
4324 would make it safe to store with a single instruction.
4325 We guarantee the alignment and padding for static objects by aligning
4326 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4327 FIXME: We currently cannot guarantee this for objects on the stack
4328 because assign_parm_setup_stack calls assign_stack_local with the
4329 alignment of the parameter mode and in that case the alignment never
4330 gets adjusted by LOCAL_ALIGNMENT. */
4332 store_with_one_insn_p (rtx mem)
4334 enum machine_mode mode = GET_MODE (mem);
4335 rtx addr = XEXP (mem, 0);
4336 if (mode == BLKmode)
4338 if (GET_MODE_SIZE (mode) >= 16)
4340 /* Only static objects. */
4341 if (GET_CODE (addr) == SYMBOL_REF)
4343 /* We use the associated declaration to make sure the access is
4344 referring to the whole object.
4345 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4346 if it is necessary. Will there be cases where one exists, and
4347 the other does not? Will there be cases where both exist, but
4348 have different types? */
4349 tree decl = MEM_EXPR (mem);
4351 && TREE_CODE (decl) == VAR_DECL
4352 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4354 decl = SYMBOL_REF_DECL (addr);
4356 && TREE_CODE (decl) == VAR_DECL
4357 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4363 /* Return 1 when the address is not valid for a simple load and store as
4364 required by the '_mov*' patterns. We could make this less strict
4365 for loads, but we prefer mem's to look the same so they are more
4366 likely to be merged. */
4368 address_needs_split (rtx mem)
4370 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4371 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4372 || !(store_with_one_insn_p (mem)
4373 || mem_is_padded_component_ref (mem))))
4379 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4380 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4381 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4383 /* MEM is known to be an __ea qualified memory access. Emit a call to
4384 fetch the ppu memory to local store, and return its address in local
4388 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4392 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4393 if (!cache_fetch_dirty)
4394 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4395 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4396 2, ea_addr, EAmode, ndirty, SImode);
4401 cache_fetch = init_one_libfunc ("__cache_fetch");
4402 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4403 1, ea_addr, EAmode);
4407 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4408 dirty bit marking, inline.
4410 The cache control data structure is an array of
4412 struct __cache_tag_array
4414 unsigned int tag_lo[4];
4415 unsigned int tag_hi[4];
4416 void *data_pointer[4];
4418 vector unsigned short dirty_bits[4];
4422 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4426 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4427 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4428 rtx index_mask = gen_reg_rtx (SImode);
4429 rtx tag_arr = gen_reg_rtx (Pmode);
4430 rtx splat_mask = gen_reg_rtx (TImode);
4431 rtx splat = gen_reg_rtx (V4SImode);
4432 rtx splat_hi = NULL_RTX;
4433 rtx tag_index = gen_reg_rtx (Pmode);
4434 rtx block_off = gen_reg_rtx (SImode);
4435 rtx tag_addr = gen_reg_rtx (Pmode);
4436 rtx tag = gen_reg_rtx (V4SImode);
4437 rtx cache_tag = gen_reg_rtx (V4SImode);
4438 rtx cache_tag_hi = NULL_RTX;
4439 rtx cache_ptrs = gen_reg_rtx (TImode);
4440 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4441 rtx tag_equal = gen_reg_rtx (V4SImode);
4442 rtx tag_equal_hi = NULL_RTX;
4443 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4444 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4445 rtx eq_index = gen_reg_rtx (SImode);
4446 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4448 if (spu_ea_model != 32)
4450 splat_hi = gen_reg_rtx (V4SImode);
4451 cache_tag_hi = gen_reg_rtx (V4SImode);
4452 tag_equal_hi = gen_reg_rtx (V4SImode);
4455 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4456 emit_move_insn (tag_arr, tag_arr_sym);
4457 v = 0x0001020300010203LL;
4458 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4459 ea_addr_si = ea_addr;
4460 if (spu_ea_model != 32)
4461 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4463 /* tag_index = ea_addr & (tag_array_size - 128) */
4464 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4466 /* splat ea_addr to all 4 slots. */
4467 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4468 /* Similarly for high 32 bits of ea_addr. */
4469 if (spu_ea_model != 32)
4470 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4472 /* block_off = ea_addr & 127 */
4473 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4475 /* tag_addr = tag_arr + tag_index */
4476 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4478 /* Read cache tags. */
4479 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4480 if (spu_ea_model != 32)
4481 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4482 plus_constant (tag_addr, 16)));
4484 /* tag = ea_addr & -128 */
4485 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4487 /* Read all four cache data pointers. */
4488 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4489 plus_constant (tag_addr, 32)));
4492 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4493 if (spu_ea_model != 32)
4495 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4496 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4499 /* At most one of the tags compare equal, so tag_equal has one
4500 32-bit slot set to all 1's, with the other slots all zero.
4501 gbb picks off low bit from each byte in the 128-bit registers,
4502 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4504 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4505 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4507 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4508 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4510 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4511 (rotating eq_index mod 16 bytes). */
4512 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4513 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4515 /* Add block offset to form final data address. */
4516 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4518 /* Check that we did hit. */
4519 hit_label = gen_label_rtx ();
4520 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4521 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4522 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4523 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4525 /* Say that this branch is very likely to happen. */
4526 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4527 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
4529 ea_load_store (mem, is_store, ea_addr, data_addr);
4530 cont_label = gen_label_rtx ();
4531 emit_jump_insn (gen_jump (cont_label));
4534 emit_label (hit_label);
4539 rtx dirty_bits = gen_reg_rtx (TImode);
4540 rtx dirty_off = gen_reg_rtx (SImode);
4541 rtx dirty_128 = gen_reg_rtx (TImode);
4542 rtx neg_block_off = gen_reg_rtx (SImode);
4544 /* Set up mask with one dirty bit per byte of the mem we are
4545 writing, starting from top bit. */
4547 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4548 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4553 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4555 /* Form index into cache dirty_bits. eq_index is one of
4556 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4557 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4558 offset to each of the four dirty_bits elements. */
4559 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4561 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4563 /* Rotate bit mask to proper bit. */
4564 emit_insn (gen_negsi2 (neg_block_off, block_off));
4565 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4566 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4568 /* Or in the new dirty bits. */
4569 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4572 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4575 emit_label (cont_label);
4579 expand_ea_mem (rtx mem, bool is_store)
4582 rtx data_addr = gen_reg_rtx (Pmode);
4585 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4586 if (optimize_size || optimize == 0)
4587 ea_load_store (mem, is_store, ea_addr, data_addr);
4589 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4591 if (ea_alias_set == -1)
4592 ea_alias_set = new_alias_set ();
4594 /* We generate a new MEM RTX to refer to the copy of the data
4595 in the cache. We do not copy memory attributes (except the
4596 alignment) from the original MEM, as they may no longer apply
4597 to the cache copy. */
4598 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4599 set_mem_alias_set (new_mem, ea_alias_set);
4600 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4606 spu_expand_mov (rtx * ops, enum machine_mode mode)
4608 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4611 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4613 rtx from = SUBREG_REG (ops[1]);
4614 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4616 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4617 && GET_MODE_CLASS (imode) == MODE_INT
4618 && subreg_lowpart_p (ops[1]));
4620 if (GET_MODE_SIZE (imode) < 4)
4622 if (imode != GET_MODE (from))
4623 from = gen_rtx_SUBREG (imode, from, 0);
4625 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4627 enum insn_code icode = convert_optab_handler (trunc_optab,
4629 emit_insn (GEN_FCN (icode) (ops[0], from));
4632 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4636 /* At least one of the operands needs to be a register. */
4637 if ((reload_in_progress | reload_completed) == 0
4638 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4640 rtx temp = force_reg (mode, ops[1]);
4641 emit_move_insn (ops[0], temp);
4644 if (reload_in_progress || reload_completed)
4646 if (CONSTANT_P (ops[1]))
4647 return spu_split_immediate (ops);
4651 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4653 if (GET_CODE (ops[1]) == CONST_INT)
4655 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4656 if (val != INTVAL (ops[1]))
4658 emit_move_insn (ops[0], GEN_INT (val));
4664 if (MEM_ADDR_SPACE (ops[0]))
4665 ops[0] = expand_ea_mem (ops[0], true);
4666 return spu_split_store (ops);
4670 if (MEM_ADDR_SPACE (ops[1]))
4671 ops[1] = expand_ea_mem (ops[1], false);
4672 return spu_split_load (ops);
4679 spu_convert_move (rtx dst, rtx src)
4681 enum machine_mode mode = GET_MODE (dst);
4682 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4684 gcc_assert (GET_MODE (src) == TImode);
4685 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4686 emit_insn (gen_rtx_SET (VOIDmode, reg,
4687 gen_rtx_TRUNCATE (int_mode,
4688 gen_rtx_LSHIFTRT (TImode, src,
4689 GEN_INT (int_mode == DImode ? 64 : 96)))));
4690 if (int_mode != mode)
4692 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4693 emit_move_insn (dst, reg);
4697 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4698 the address from SRC and SRC+16. Return a REG or CONST_INT that
4699 specifies how many bytes to rotate the loaded registers, plus any
4700 extra from EXTRA_ROTQBY. The address and rotate amounts are
4701 normalized to improve merging of loads and rotate computations. */
4703 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4705 rtx addr = XEXP (src, 0);
4706 rtx p0, p1, rot, addr0, addr1;
4712 if (MEM_ALIGN (src) >= 128)
4713 /* Address is already aligned; simply perform a TImode load. */ ;
4714 else if (GET_CODE (addr) == PLUS)
4717 aligned reg + aligned reg => lqx
4718 aligned reg + unaligned reg => lqx, rotqby
4719 aligned reg + aligned const => lqd
4720 aligned reg + unaligned const => lqd, rotqbyi
4721 unaligned reg + aligned reg => lqx, rotqby
4722 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4723 unaligned reg + aligned const => lqd, rotqby
4724 unaligned reg + unaligned const -> not allowed by legitimate address
4726 p0 = XEXP (addr, 0);
4727 p1 = XEXP (addr, 1);
4728 if (!reg_aligned_for_addr (p0))
4730 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4732 rot = gen_reg_rtx (SImode);
4733 emit_insn (gen_addsi3 (rot, p0, p1));
4735 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4739 && INTVAL (p1) * BITS_PER_UNIT
4740 < REGNO_POINTER_ALIGN (REGNO (p0)))
4742 rot = gen_reg_rtx (SImode);
4743 emit_insn (gen_addsi3 (rot, p0, p1));
4748 rtx x = gen_reg_rtx (SImode);
4749 emit_move_insn (x, p1);
4750 if (!spu_arith_operand (p1, SImode))
4752 rot = gen_reg_rtx (SImode);
4753 emit_insn (gen_addsi3 (rot, p0, p1));
4754 addr = gen_rtx_PLUS (Pmode, p0, x);
4762 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4764 rot_amt = INTVAL (p1) & 15;
4765 if (INTVAL (p1) & -16)
4767 p1 = GEN_INT (INTVAL (p1) & -16);
4768 addr = gen_rtx_PLUS (SImode, p0, p1);
4773 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4777 else if (REG_P (addr))
4779 if (!reg_aligned_for_addr (addr))
4782 else if (GET_CODE (addr) == CONST)
4784 if (GET_CODE (XEXP (addr, 0)) == PLUS
4785 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4786 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4788 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4790 addr = gen_rtx_CONST (Pmode,
4791 gen_rtx_PLUS (Pmode,
4792 XEXP (XEXP (addr, 0), 0),
4793 GEN_INT (rot_amt & -16)));
4795 addr = XEXP (XEXP (addr, 0), 0);
4799 rot = gen_reg_rtx (Pmode);
4800 emit_move_insn (rot, addr);
4803 else if (GET_CODE (addr) == CONST_INT)
4805 rot_amt = INTVAL (addr);
4806 addr = GEN_INT (rot_amt & -16);
4808 else if (!ALIGNED_SYMBOL_REF_P (addr))
4810 rot = gen_reg_rtx (Pmode);
4811 emit_move_insn (rot, addr);
4814 rot_amt += extra_rotby;
4820 rtx x = gen_reg_rtx (SImode);
4821 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4825 if (!rot && rot_amt)
4826 rot = GEN_INT (rot_amt);
4828 addr0 = copy_rtx (addr);
4829 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4830 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4834 addr1 = plus_constant (copy_rtx (addr), 16);
4835 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4836 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4843 spu_split_load (rtx * ops)
4845 enum machine_mode mode = GET_MODE (ops[0]);
4846 rtx addr, load, rot;
4849 if (GET_MODE_SIZE (mode) >= 16)
4852 addr = XEXP (ops[1], 0);
4853 gcc_assert (GET_CODE (addr) != AND);
4855 if (!address_needs_split (ops[1]))
4857 ops[1] = change_address (ops[1], TImode, addr);
4858 load = gen_reg_rtx (TImode);
4859 emit_insn (gen__movti (load, ops[1]));
4860 spu_convert_move (ops[0], load);
4864 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4866 load = gen_reg_rtx (TImode);
4867 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4870 emit_insn (gen_rotqby_ti (load, load, rot));
4872 spu_convert_move (ops[0], load);
4877 spu_split_store (rtx * ops)
4879 enum machine_mode mode = GET_MODE (ops[0]);
4881 rtx addr, p0, p1, p1_lo, smem;
4885 if (GET_MODE_SIZE (mode) >= 16)
4888 addr = XEXP (ops[0], 0);
4889 gcc_assert (GET_CODE (addr) != AND);
4891 if (!address_needs_split (ops[0]))
4893 reg = gen_reg_rtx (TImode);
4894 emit_insn (gen_spu_convert (reg, ops[1]));
4895 ops[0] = change_address (ops[0], TImode, addr);
4896 emit_move_insn (ops[0], reg);
4900 if (GET_CODE (addr) == PLUS)
4903 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4904 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4905 aligned reg + aligned const => lqd, c?d, shuf, stqx
4906 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4907 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4908 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4909 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4910 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4913 p0 = XEXP (addr, 0);
4914 p1 = p1_lo = XEXP (addr, 1);
4915 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4917 p1_lo = GEN_INT (INTVAL (p1) & 15);
4918 if (reg_aligned_for_addr (p0))
4920 p1 = GEN_INT (INTVAL (p1) & -16);
4921 if (p1 == const0_rtx)
4924 addr = gen_rtx_PLUS (SImode, p0, p1);
4928 rtx x = gen_reg_rtx (SImode);
4929 emit_move_insn (x, p1);
4930 addr = gen_rtx_PLUS (SImode, p0, x);
4934 else if (REG_P (addr))
4938 p1 = p1_lo = const0_rtx;
4943 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4944 p1 = 0; /* aform doesn't use p1 */
4946 if (ALIGNED_SYMBOL_REF_P (addr))
4948 else if (GET_CODE (addr) == CONST
4949 && GET_CODE (XEXP (addr, 0)) == PLUS
4950 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4951 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4953 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4955 addr = gen_rtx_CONST (Pmode,
4956 gen_rtx_PLUS (Pmode,
4957 XEXP (XEXP (addr, 0), 0),
4958 GEN_INT (v & -16)));
4960 addr = XEXP (XEXP (addr, 0), 0);
4961 p1_lo = GEN_INT (v & 15);
4963 else if (GET_CODE (addr) == CONST_INT)
4965 p1_lo = GEN_INT (INTVAL (addr) & 15);
4966 addr = GEN_INT (INTVAL (addr) & -16);
4970 p1_lo = gen_reg_rtx (SImode);
4971 emit_move_insn (p1_lo, addr);
4975 reg = gen_reg_rtx (TImode);
4977 scalar = store_with_one_insn_p (ops[0]);
4980 /* We could copy the flags from the ops[0] MEM to mem here,
4981 We don't because we want this load to be optimized away if
4982 possible, and copying the flags will prevent that in certain
4983 cases, e.g. consider the volatile flag. */
4985 rtx pat = gen_reg_rtx (TImode);
4986 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4987 set_mem_alias_set (lmem, 0);
4988 emit_insn (gen_movti (reg, lmem));
4990 if (!p0 || reg_aligned_for_addr (p0))
4991 p0 = stack_pointer_rtx;
4995 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4996 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5000 if (GET_CODE (ops[1]) == REG)
5001 emit_insn (gen_spu_convert (reg, ops[1]));
5002 else if (GET_CODE (ops[1]) == SUBREG)
5003 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5008 if (GET_MODE_SIZE (mode) < 4 && scalar)
5009 emit_insn (gen_ashlti3
5010 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
5012 smem = change_address (ops[0], TImode, copy_rtx (addr));
5013 /* We can't use the previous alias set because the memory has changed
5014 size and can potentially overlap objects of other types. */
5015 set_mem_alias_set (smem, 0);
5017 emit_insn (gen_movti (smem, reg));
5021 /* Return TRUE if X is MEM which is a struct member reference
5022 and the member can safely be loaded and stored with a single
5023 instruction because it is padded. */
5025 mem_is_padded_component_ref (rtx x)
5027 tree t = MEM_EXPR (x);
5029 if (!t || TREE_CODE (t) != COMPONENT_REF)
5031 t = TREE_OPERAND (t, 1);
5032 if (!t || TREE_CODE (t) != FIELD_DECL
5033 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5035 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5036 r = DECL_FIELD_CONTEXT (t);
5037 if (!r || TREE_CODE (r) != RECORD_TYPE)
5039 /* Make sure they are the same mode */
5040 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5042 /* If there are no following fields then the field alignment assures
5043 the structure is padded to the alignment which means this field is
5045 if (TREE_CHAIN (t) == 0)
5047 /* If the following field is also aligned then this field will be
5050 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5055 /* Parse the -mfixed-range= option string. */
5057 fix_range (const char *const_str)
5060 char *str, *dash, *comma;
5062 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5063 REG2 are either register names or register numbers. The effect
5064 of this option is to mark the registers in the range from REG1 to
5065 REG2 as ``fixed'' so they won't be used by the compiler. */
5067 i = strlen (const_str);
5068 str = (char *) alloca (i + 1);
5069 memcpy (str, const_str, i + 1);
5073 dash = strchr (str, '-');
5076 warning (0, "value of -mfixed-range must have form REG1-REG2");
5080 comma = strchr (dash + 1, ',');
5084 first = decode_reg_name (str);
5087 warning (0, "unknown register name: %s", str);
5091 last = decode_reg_name (dash + 1);
5094 warning (0, "unknown register name: %s", dash + 1);
5102 warning (0, "%s-%s is an empty range", str, dash + 1);
5106 for (i = first; i <= last; ++i)
5107 fixed_regs[i] = call_used_regs[i] = 1;
5117 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5118 can be generated using the fsmbi instruction. */
5120 fsmbi_const_p (rtx x)
5124 /* We can always choose TImode for CONST_INT because the high bits
5125 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5126 enum immediate_class c = classify_immediate (x, TImode);
5127 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5132 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5133 can be generated using the cbd, chd, cwd or cdd instruction. */
5135 cpat_const_p (rtx x, enum machine_mode mode)
5139 enum immediate_class c = classify_immediate (x, mode);
5140 return c == IC_CPAT;
5146 gen_cpat_const (rtx * ops)
5148 unsigned char dst[16];
5149 int i, offset, shift, isize;
5150 if (GET_CODE (ops[3]) != CONST_INT
5151 || GET_CODE (ops[2]) != CONST_INT
5152 || (GET_CODE (ops[1]) != CONST_INT
5153 && GET_CODE (ops[1]) != REG))
5155 if (GET_CODE (ops[1]) == REG
5156 && (!REG_POINTER (ops[1])
5157 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5160 for (i = 0; i < 16; i++)
5162 isize = INTVAL (ops[3]);
5165 else if (isize == 2)
5169 offset = (INTVAL (ops[2]) +
5170 (GET_CODE (ops[1]) ==
5171 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5172 for (i = 0; i < isize; i++)
5173 dst[offset + i] = i + shift;
5174 return array_to_constant (TImode, dst);
5177 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5178 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5179 than 16 bytes, the value is repeated across the rest of the array. */
5181 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5186 memset (arr, 0, 16);
5187 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5188 if (GET_CODE (x) == CONST_INT
5189 || (GET_CODE (x) == CONST_DOUBLE
5190 && (mode == SFmode || mode == DFmode)))
5192 gcc_assert (mode != VOIDmode && mode != BLKmode);
5194 if (GET_CODE (x) == CONST_DOUBLE)
5195 val = const_double_to_hwint (x);
5198 first = GET_MODE_SIZE (mode) - 1;
5199 for (i = first; i >= 0; i--)
5201 arr[i] = val & 0xff;
5204 /* Splat the constant across the whole array. */
5205 for (j = 0, i = first + 1; i < 16; i++)
5208 j = (j == first) ? 0 : j + 1;
5211 else if (GET_CODE (x) == CONST_DOUBLE)
5213 val = CONST_DOUBLE_LOW (x);
5214 for (i = 15; i >= 8; i--)
5216 arr[i] = val & 0xff;
5219 val = CONST_DOUBLE_HIGH (x);
5220 for (i = 7; i >= 0; i--)
5222 arr[i] = val & 0xff;
5226 else if (GET_CODE (x) == CONST_VECTOR)
5230 mode = GET_MODE_INNER (mode);
5231 units = CONST_VECTOR_NUNITS (x);
5232 for (i = 0; i < units; i++)
5234 elt = CONST_VECTOR_ELT (x, i);
5235 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5237 if (GET_CODE (elt) == CONST_DOUBLE)
5238 val = const_double_to_hwint (elt);
5241 first = GET_MODE_SIZE (mode) - 1;
5242 if (first + i * GET_MODE_SIZE (mode) > 16)
5244 for (j = first; j >= 0; j--)
5246 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5256 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5257 smaller than 16 bytes, use the bytes that would represent that value
5258 in a register, e.g., for QImode return the value of arr[3]. */
5260 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5262 enum machine_mode inner_mode;
5264 int units, size, i, j, k;
5267 if (GET_MODE_CLASS (mode) == MODE_INT
5268 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5270 j = GET_MODE_SIZE (mode);
5271 i = j < 4 ? 4 - j : 0;
5272 for (val = 0; i < j; i++)
5273 val = (val << 8) | arr[i];
5274 val = trunc_int_for_mode (val, mode);
5275 return GEN_INT (val);
5281 for (i = high = 0; i < 8; i++)
5282 high = (high << 8) | arr[i];
5283 for (i = 8, val = 0; i < 16; i++)
5284 val = (val << 8) | arr[i];
5285 return immed_double_const (val, high, TImode);
5289 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5290 val = trunc_int_for_mode (val, SImode);
5291 return hwint_to_const_double (SFmode, val);
5295 for (i = 0, val = 0; i < 8; i++)
5296 val = (val << 8) | arr[i];
5297 return hwint_to_const_double (DFmode, val);
5300 if (!VECTOR_MODE_P (mode))
5303 units = GET_MODE_NUNITS (mode);
5304 size = GET_MODE_UNIT_SIZE (mode);
5305 inner_mode = GET_MODE_INNER (mode);
5306 v = rtvec_alloc (units);
5308 for (k = i = 0; i < units; ++i)
5311 for (j = 0; j < size; j++, k++)
5312 val = (val << 8) | arr[k];
5314 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5315 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5317 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5322 return gen_rtx_CONST_VECTOR (mode, v);
5326 reloc_diagnostic (rtx x)
5329 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5332 if (GET_CODE (x) == SYMBOL_REF)
5333 decl = SYMBOL_REF_DECL (x);
5334 else if (GET_CODE (x) == CONST
5335 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5336 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5338 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5339 if (decl && !DECL_P (decl))
5342 /* The decl could be a string constant. */
5343 if (decl && DECL_P (decl))
5346 /* We use last_assemble_variable_decl to get line information. It's
5347 not always going to be right and might not even be close, but will
5348 be right for the more common cases. */
5349 if (!last_assemble_variable_decl || in_section == ctors_section)
5350 loc = DECL_SOURCE_LOCATION (decl);
5352 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5354 if (TARGET_WARN_RELOC)
5356 "creating run-time relocation for %qD", decl);
5359 "creating run-time relocation for %qD", decl);
5363 if (TARGET_WARN_RELOC)
5364 warning_at (input_location, 0, "creating run-time relocation");
5366 error_at (input_location, "creating run-time relocation");
5370 /* Hook into assemble_integer so we can generate an error for run-time
5371 relocations. The SPU ABI disallows them. */
5373 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5375 /* By default run-time relocations aren't supported, but we allow them
5376 in case users support it in their own run-time loader. And we provide
5377 a warning for those users that don't. */
5378 if ((GET_CODE (x) == SYMBOL_REF)
5379 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5380 reloc_diagnostic (x);
5382 return default_assemble_integer (x, size, aligned_p);
5386 spu_asm_globalize_label (FILE * file, const char *name)
5388 fputs ("\t.global\t", file);
5389 assemble_name (file, name);
5394 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5395 bool speed ATTRIBUTE_UNUSED)
5397 enum machine_mode mode = GET_MODE (x);
5398 int cost = COSTS_N_INSNS (2);
5400 /* Folding to a CONST_VECTOR will use extra space but there might
5401 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5402 only if it allows us to fold away multiple insns. Changing the cost
5403 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5404 because this cost will only be compared against a single insn.
5405 if (code == CONST_VECTOR)
5406 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5409 /* Use defaults for float operations. Not accurate but good enough. */
5412 *total = COSTS_N_INSNS (13);
5417 *total = COSTS_N_INSNS (6);
5423 if (satisfies_constraint_K (x))
5425 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5426 *total = COSTS_N_INSNS (1);
5428 *total = COSTS_N_INSNS (3);
5432 *total = COSTS_N_INSNS (3);
5437 *total = COSTS_N_INSNS (0);
5441 *total = COSTS_N_INSNS (5);
5445 case FLOAT_TRUNCATE:
5447 case UNSIGNED_FLOAT:
5450 *total = COSTS_N_INSNS (7);
5456 *total = COSTS_N_INSNS (9);
5463 GET_CODE (XEXP (x, 0)) ==
5464 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5465 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5467 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5469 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5470 cost = COSTS_N_INSNS (14);
5471 if ((val & 0xffff) == 0)
5472 cost = COSTS_N_INSNS (9);
5473 else if (val > 0 && val < 0x10000)
5474 cost = COSTS_N_INSNS (11);
5483 *total = COSTS_N_INSNS (20);
5490 *total = COSTS_N_INSNS (4);
5493 if (XINT (x, 1) == UNSPEC_CONVERT)
5494 *total = COSTS_N_INSNS (0);
5496 *total = COSTS_N_INSNS (4);
5499 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5500 if (GET_MODE_CLASS (mode) == MODE_INT
5501 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5502 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5503 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5508 static enum machine_mode
5509 spu_unwind_word_mode (void)
5514 /* Decide whether we can make a sibling call to a function. DECL is the
5515 declaration of the function being targeted by the call and EXP is the
5516 CALL_EXPR representing the call. */
5518 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5520 return decl && !TARGET_LARGE_MEM;
5523 /* We need to correctly update the back chain pointer and the Available
5524 Stack Size (which is in the second slot of the sp register.) */
5526 spu_allocate_stack (rtx op0, rtx op1)
5529 rtx chain = gen_reg_rtx (V4SImode);
5530 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5531 rtx sp = gen_reg_rtx (V4SImode);
5532 rtx splatted = gen_reg_rtx (V4SImode);
5533 rtx pat = gen_reg_rtx (TImode);
5535 /* copy the back chain so we can save it back again. */
5536 emit_move_insn (chain, stack_bot);
5538 op1 = force_reg (SImode, op1);
5540 v = 0x1020300010203ll;
5541 emit_move_insn (pat, immed_double_const (v, v, TImode));
5542 emit_insn (gen_shufb (splatted, op1, op1, pat));
5544 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5545 emit_insn (gen_subv4si3 (sp, sp, splatted));
5547 if (flag_stack_check)
5549 rtx avail = gen_reg_rtx(SImode);
5550 rtx result = gen_reg_rtx(SImode);
5551 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5552 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5553 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5556 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5558 emit_move_insn (stack_bot, chain);
5560 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5564 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5566 static unsigned char arr[16] =
5567 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5568 rtx temp = gen_reg_rtx (SImode);
5569 rtx temp2 = gen_reg_rtx (SImode);
5570 rtx temp3 = gen_reg_rtx (V4SImode);
5571 rtx temp4 = gen_reg_rtx (V4SImode);
5572 rtx pat = gen_reg_rtx (TImode);
5573 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5575 /* Restore the backchain from the first word, sp from the second. */
5576 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5577 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5579 emit_move_insn (pat, array_to_constant (TImode, arr));
5581 /* Compute Available Stack Size for sp */
5582 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5583 emit_insn (gen_shufb (temp3, temp, temp, pat));
5585 /* Compute Available Stack Size for back chain */
5586 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5587 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5588 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5590 emit_insn (gen_addv4si3 (sp, sp, temp3));
5591 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5595 spu_init_libfuncs (void)
5597 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5598 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5599 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5600 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5601 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5602 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5603 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5604 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5605 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5606 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5607 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5609 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5610 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5612 set_optab_libfunc (smul_optab, TImode, "__multi3");
5613 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5614 set_optab_libfunc (smod_optab, TImode, "__modti3");
5615 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5616 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5617 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5620 /* Make a subreg, stripping any existing subreg. We could possibly just
5621 call simplify_subreg, but in this case we know what we want. */
5623 spu_gen_subreg (enum machine_mode mode, rtx x)
5625 if (GET_CODE (x) == SUBREG)
5627 if (GET_MODE (x) == mode)
5629 return gen_rtx_SUBREG (mode, x, 0);
5633 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5635 return (TYPE_MODE (type) == BLKmode
5637 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5638 || int_size_in_bytes (type) >
5639 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5642 /* Create the built-in types and functions */
5644 enum spu_function_code
5646 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5647 #include "spu-builtins.def"
5652 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5654 struct spu_builtin_description spu_builtins[] = {
5655 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5656 {fcode, icode, name, type, params},
5657 #include "spu-builtins.def"
5661 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5663 /* Returns the spu builtin decl for CODE. */
5666 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5668 if (code >= NUM_SPU_BUILTINS)
5669 return error_mark_node;
5671 return spu_builtin_decls[code];
5676 spu_init_builtins (void)
5678 struct spu_builtin_description *d;
5681 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5682 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5683 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5684 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5685 V4SF_type_node = build_vector_type (float_type_node, 4);
5686 V2DF_type_node = build_vector_type (double_type_node, 2);
5688 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5689 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5690 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5691 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5693 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5695 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5696 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5697 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5698 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5699 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5700 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5701 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5702 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5703 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5704 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5705 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5706 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5708 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5709 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5710 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5711 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5712 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5713 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5714 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5715 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5717 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5718 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5720 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5722 spu_builtin_types[SPU_BTI_PTR] =
5723 build_pointer_type (build_qualified_type
5725 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5727 /* For each builtin we build a new prototype. The tree code will make
5728 sure nodes are shared. */
5729 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5732 char name[64]; /* build_function will make a copy. */
5738 /* Find last parm. */
5739 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5744 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5746 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5748 sprintf (name, "__builtin_%s", d->name);
5749 spu_builtin_decls[i] =
5750 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5751 if (d->fcode == SPU_MASK_FOR_LOAD)
5752 TREE_READONLY (spu_builtin_decls[i]) = 1;
5754 /* These builtins don't throw. */
5755 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5760 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5762 static unsigned char arr[16] =
5763 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5765 rtx temp = gen_reg_rtx (Pmode);
5766 rtx temp2 = gen_reg_rtx (V4SImode);
5767 rtx temp3 = gen_reg_rtx (V4SImode);
5768 rtx pat = gen_reg_rtx (TImode);
5769 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5771 emit_move_insn (pat, array_to_constant (TImode, arr));
5773 /* Restore the sp. */
5774 emit_move_insn (temp, op1);
5775 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5777 /* Compute available stack size for sp. */
5778 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5779 emit_insn (gen_shufb (temp3, temp, temp, pat));
5781 emit_insn (gen_addv4si3 (sp, sp, temp3));
5782 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5786 spu_safe_dma (HOST_WIDE_INT channel)
5788 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5792 spu_builtin_splats (rtx ops[])
5794 enum machine_mode mode = GET_MODE (ops[0]);
5795 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5797 unsigned char arr[16];
5798 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5799 emit_move_insn (ops[0], array_to_constant (mode, arr));
5803 rtx reg = gen_reg_rtx (TImode);
5805 if (GET_CODE (ops[1]) != REG
5806 && GET_CODE (ops[1]) != SUBREG)
5807 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5813 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5819 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5824 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5829 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5835 emit_move_insn (reg, shuf);
5836 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5841 spu_builtin_extract (rtx ops[])
5843 enum machine_mode mode;
5846 mode = GET_MODE (ops[1]);
5848 if (GET_CODE (ops[2]) == CONST_INT)
5853 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5856 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5859 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5862 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5865 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5868 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5876 from = spu_gen_subreg (TImode, ops[1]);
5877 rot = gen_reg_rtx (TImode);
5878 tmp = gen_reg_rtx (SImode);
5883 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5886 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5887 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5891 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5895 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5900 emit_insn (gen_rotqby_ti (rot, from, tmp));
5902 emit_insn (gen_spu_convert (ops[0], rot));
5906 spu_builtin_insert (rtx ops[])
5908 enum machine_mode mode = GET_MODE (ops[0]);
5909 enum machine_mode imode = GET_MODE_INNER (mode);
5910 rtx mask = gen_reg_rtx (TImode);
5913 if (GET_CODE (ops[3]) == CONST_INT)
5914 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5917 offset = gen_reg_rtx (SImode);
5918 emit_insn (gen_mulsi3
5919 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5922 (mask, stack_pointer_rtx, offset,
5923 GEN_INT (GET_MODE_SIZE (imode))));
5924 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5928 spu_builtin_promote (rtx ops[])
5930 enum machine_mode mode, imode;
5931 rtx rot, from, offset;
5934 mode = GET_MODE (ops[0]);
5935 imode = GET_MODE_INNER (mode);
5937 from = gen_reg_rtx (TImode);
5938 rot = spu_gen_subreg (TImode, ops[0]);
5940 emit_insn (gen_spu_convert (from, ops[1]));
5942 if (GET_CODE (ops[2]) == CONST_INT)
5944 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5945 if (GET_MODE_SIZE (imode) < 4)
5946 pos += 4 - GET_MODE_SIZE (imode);
5947 offset = GEN_INT (pos & 15);
5951 offset = gen_reg_rtx (SImode);
5955 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5958 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5959 emit_insn (gen_addsi3 (offset, offset, offset));
5963 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5964 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5968 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5974 emit_insn (gen_rotqby_ti (rot, from, offset));
5978 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5980 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5981 rtx shuf = gen_reg_rtx (V4SImode);
5982 rtx insn = gen_reg_rtx (V4SImode);
5987 fnaddr = force_reg (SImode, fnaddr);
5988 cxt = force_reg (SImode, cxt);
5990 if (TARGET_LARGE_MEM)
5992 rtx rotl = gen_reg_rtx (V4SImode);
5993 rtx mask = gen_reg_rtx (V4SImode);
5994 rtx bi = gen_reg_rtx (SImode);
5995 static unsigned char const shufa[16] = {
5996 2, 3, 0, 1, 18, 19, 16, 17,
5997 0, 1, 2, 3, 16, 17, 18, 19
5999 static unsigned char const insna[16] = {
6001 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6003 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6006 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6007 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6009 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
6010 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
6011 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6012 emit_insn (gen_selb (insn, insnc, rotl, mask));
6014 mem = adjust_address (m_tramp, V4SImode, 0);
6015 emit_move_insn (mem, insn);
6017 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
6018 mem = adjust_address (m_tramp, Pmode, 16);
6019 emit_move_insn (mem, bi);
6023 rtx scxt = gen_reg_rtx (SImode);
6024 rtx sfnaddr = gen_reg_rtx (SImode);
6025 static unsigned char const insna[16] = {
6026 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6032 shufc = gen_reg_rtx (TImode);
6033 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6035 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6036 fits 18 bits and the last 4 are zeros. This will be true if
6037 the stack pointer is initialized to 0x3fff0 at program start,
6038 otherwise the ila instruction will be garbage. */
6040 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6041 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6043 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6044 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6045 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6047 mem = adjust_address (m_tramp, V4SImode, 0);
6048 emit_move_insn (mem, insn);
6050 emit_insn (gen_sync ());
6054 spu_expand_sign_extend (rtx ops[])
6056 unsigned char arr[16];
6057 rtx pat = gen_reg_rtx (TImode);
6060 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6061 if (GET_MODE (ops[1]) == QImode)
6063 sign = gen_reg_rtx (HImode);
6064 emit_insn (gen_extendqihi2 (sign, ops[1]));
6065 for (i = 0; i < 16; i++)
6071 for (i = 0; i < 16; i++)
6073 switch (GET_MODE (ops[1]))
6076 sign = gen_reg_rtx (SImode);
6077 emit_insn (gen_extendhisi2 (sign, ops[1]));
6079 arr[last - 1] = 0x02;
6082 sign = gen_reg_rtx (SImode);
6083 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6084 for (i = 0; i < 4; i++)
6085 arr[last - i] = 3 - i;
6088 sign = gen_reg_rtx (SImode);
6089 c = gen_reg_rtx (SImode);
6090 emit_insn (gen_spu_convert (c, ops[1]));
6091 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6092 for (i = 0; i < 8; i++)
6093 arr[last - i] = 7 - i;
6099 emit_move_insn (pat, array_to_constant (TImode, arr));
6100 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6103 /* expand vector initialization. If there are any constant parts,
6104 load constant parts first. Then load any non-constant parts. */
6106 spu_expand_vector_init (rtx target, rtx vals)
6108 enum machine_mode mode = GET_MODE (target);
6109 int n_elts = GET_MODE_NUNITS (mode);
6111 bool all_same = true;
6112 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6115 first = XVECEXP (vals, 0, 0);
6116 for (i = 0; i < n_elts; ++i)
6118 x = XVECEXP (vals, 0, i);
6119 if (!(CONST_INT_P (x)
6120 || GET_CODE (x) == CONST_DOUBLE
6121 || GET_CODE (x) == CONST_FIXED))
6125 if (first_constant == NULL_RTX)
6128 if (i > 0 && !rtx_equal_p (x, first))
6132 /* if all elements are the same, use splats to repeat elements */
6135 if (!CONSTANT_P (first)
6136 && !register_operand (first, GET_MODE (x)))
6137 first = force_reg (GET_MODE (first), first);
6138 emit_insn (gen_spu_splats (target, first));
6142 /* load constant parts */
6143 if (n_var != n_elts)
6147 emit_move_insn (target,
6148 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6152 rtx constant_parts_rtx = copy_rtx (vals);
6154 gcc_assert (first_constant != NULL_RTX);
6155 /* fill empty slots with the first constant, this increases
6156 our chance of using splats in the recursive call below. */
6157 for (i = 0; i < n_elts; ++i)
6159 x = XVECEXP (constant_parts_rtx, 0, i);
6160 if (!(CONST_INT_P (x)
6161 || GET_CODE (x) == CONST_DOUBLE
6162 || GET_CODE (x) == CONST_FIXED))
6163 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6166 spu_expand_vector_init (target, constant_parts_rtx);
6170 /* load variable parts */
6173 rtx insert_operands[4];
6175 insert_operands[0] = target;
6176 insert_operands[2] = target;
6177 for (i = 0; i < n_elts; ++i)
6179 x = XVECEXP (vals, 0, i);
6180 if (!(CONST_INT_P (x)
6181 || GET_CODE (x) == CONST_DOUBLE
6182 || GET_CODE (x) == CONST_FIXED))
6184 if (!register_operand (x, GET_MODE (x)))
6185 x = force_reg (GET_MODE (x), x);
6186 insert_operands[1] = x;
6187 insert_operands[3] = GEN_INT (i);
6188 spu_builtin_insert (insert_operands);
6194 /* Return insn index for the vector compare instruction for given CODE,
6195 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6198 get_vec_cmp_insn (enum rtx_code code,
6199 enum machine_mode dest_mode,
6200 enum machine_mode op_mode)
6206 if (dest_mode == V16QImode && op_mode == V16QImode)
6207 return CODE_FOR_ceq_v16qi;
6208 if (dest_mode == V8HImode && op_mode == V8HImode)
6209 return CODE_FOR_ceq_v8hi;
6210 if (dest_mode == V4SImode && op_mode == V4SImode)
6211 return CODE_FOR_ceq_v4si;
6212 if (dest_mode == V4SImode && op_mode == V4SFmode)
6213 return CODE_FOR_ceq_v4sf;
6214 if (dest_mode == V2DImode && op_mode == V2DFmode)
6215 return CODE_FOR_ceq_v2df;
6218 if (dest_mode == V16QImode && op_mode == V16QImode)
6219 return CODE_FOR_cgt_v16qi;
6220 if (dest_mode == V8HImode && op_mode == V8HImode)
6221 return CODE_FOR_cgt_v8hi;
6222 if (dest_mode == V4SImode && op_mode == V4SImode)
6223 return CODE_FOR_cgt_v4si;
6224 if (dest_mode == V4SImode && op_mode == V4SFmode)
6225 return CODE_FOR_cgt_v4sf;
6226 if (dest_mode == V2DImode && op_mode == V2DFmode)
6227 return CODE_FOR_cgt_v2df;
6230 if (dest_mode == V16QImode && op_mode == V16QImode)
6231 return CODE_FOR_clgt_v16qi;
6232 if (dest_mode == V8HImode && op_mode == V8HImode)
6233 return CODE_FOR_clgt_v8hi;
6234 if (dest_mode == V4SImode && op_mode == V4SImode)
6235 return CODE_FOR_clgt_v4si;
6243 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6244 DMODE is expected destination mode. This is a recursive function. */
6247 spu_emit_vector_compare (enum rtx_code rcode,
6249 enum machine_mode dmode)
6253 enum machine_mode dest_mode;
6254 enum machine_mode op_mode = GET_MODE (op1);
6256 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6258 /* Floating point vector compare instructions uses destination V4SImode.
6259 Double floating point vector compare instructions uses destination V2DImode.
6260 Move destination to appropriate mode later. */
6261 if (dmode == V4SFmode)
6262 dest_mode = V4SImode;
6263 else if (dmode == V2DFmode)
6264 dest_mode = V2DImode;
6268 mask = gen_reg_rtx (dest_mode);
6269 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6271 if (vec_cmp_insn == -1)
6273 bool swap_operands = false;
6274 bool try_again = false;
6279 swap_operands = true;
6284 swap_operands = true;
6288 /* Treat A != B as ~(A==B). */
6290 enum insn_code nor_code;
6291 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6292 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6293 gcc_assert (nor_code != CODE_FOR_nothing);
6294 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6295 if (dmode != dest_mode)
6297 rtx temp = gen_reg_rtx (dest_mode);
6298 convert_move (temp, mask, 0);
6308 /* Try GT/GTU/LT/LTU OR EQ */
6311 enum insn_code ior_code;
6312 enum rtx_code new_code;
6316 case GE: new_code = GT; break;
6317 case GEU: new_code = GTU; break;
6318 case LE: new_code = LT; break;
6319 case LEU: new_code = LTU; break;
6324 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6325 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6327 ior_code = optab_handler (ior_optab, dest_mode);
6328 gcc_assert (ior_code != CODE_FOR_nothing);
6329 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6330 if (dmode != dest_mode)
6332 rtx temp = gen_reg_rtx (dest_mode);
6333 convert_move (temp, mask, 0);
6343 /* You only get two chances. */
6345 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6347 gcc_assert (vec_cmp_insn != -1);
6358 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6359 if (dmode != dest_mode)
6361 rtx temp = gen_reg_rtx (dest_mode);
6362 convert_move (temp, mask, 0);
6369 /* Emit vector conditional expression.
6370 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6371 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6374 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6375 rtx cond, rtx cc_op0, rtx cc_op1)
6377 enum machine_mode dest_mode = GET_MODE (dest);
6378 enum rtx_code rcode = GET_CODE (cond);
6381 /* Get the vector mask for the given relational operations. */
6382 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6384 emit_insn(gen_selb (dest, op2, op1, mask));
6390 spu_force_reg (enum machine_mode mode, rtx op)
6393 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6395 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6396 || GET_MODE (op) == BLKmode)
6397 return force_reg (mode, convert_to_mode (mode, op, 0));
6401 r = force_reg (GET_MODE (op), op);
6402 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6404 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6409 x = gen_reg_rtx (mode);
6410 emit_insn (gen_spu_convert (x, r));
6415 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6417 HOST_WIDE_INT v = 0;
6419 /* Check the range of immediate operands. */
6420 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6422 int range = p - SPU_BTI_7;
6424 if (!CONSTANT_P (op))
6425 error ("%s expects an integer literal in the range [%d, %d].",
6427 spu_builtin_range[range].low, spu_builtin_range[range].high);
6429 if (GET_CODE (op) == CONST
6430 && (GET_CODE (XEXP (op, 0)) == PLUS
6431 || GET_CODE (XEXP (op, 0)) == MINUS))
6433 v = INTVAL (XEXP (XEXP (op, 0), 1));
6434 op = XEXP (XEXP (op, 0), 0);
6436 else if (GET_CODE (op) == CONST_INT)
6438 else if (GET_CODE (op) == CONST_VECTOR
6439 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6440 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6442 /* The default for v is 0 which is valid in every range. */
6443 if (v < spu_builtin_range[range].low
6444 || v > spu_builtin_range[range].high)
6445 error ("%s expects an integer literal in the range [%d, %d]. ("
6446 HOST_WIDE_INT_PRINT_DEC ")",
6448 spu_builtin_range[range].low, spu_builtin_range[range].high,
6457 /* This is only used in lqa, and stqa. Even though the insns
6458 encode 16 bits of the address (all but the 2 least
6459 significant), only 14 bits are used because it is masked to
6460 be 16 byte aligned. */
6464 /* This is used for lqr and stqr. */
6471 if (GET_CODE (op) == LABEL_REF
6472 || (GET_CODE (op) == SYMBOL_REF
6473 && SYMBOL_REF_FUNCTION_P (op))
6474 || (v & ((1 << lsbits) - 1)) != 0)
6475 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6482 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6483 rtx target, rtx ops[])
6485 enum insn_code icode = (enum insn_code) d->icode;
6488 /* Expand the arguments into rtl. */
6490 if (d->parm[0] != SPU_BTI_VOID)
6493 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6495 tree arg = CALL_EXPR_ARG (exp, a);
6498 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6501 /* The insn pattern may have additional operands (SCRATCH).
6502 Return the number of actual non-SCRATCH operands. */
6503 gcc_assert (i <= insn_data[icode].n_operands);
6508 spu_expand_builtin_1 (struct spu_builtin_description *d,
6509 tree exp, rtx target)
6513 enum insn_code icode = (enum insn_code) d->icode;
6514 enum machine_mode mode, tmode;
6519 /* Set up ops[] with values from arglist. */
6520 n_operands = expand_builtin_args (d, exp, target, ops);
6522 /* Handle the target operand which must be operand 0. */
6524 if (d->parm[0] != SPU_BTI_VOID)
6527 /* We prefer the mode specified for the match_operand otherwise
6528 use the mode from the builtin function prototype. */
6529 tmode = insn_data[d->icode].operand[0].mode;
6530 if (tmode == VOIDmode)
6531 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6533 /* Try to use target because not using it can lead to extra copies
6534 and when we are using all of the registers extra copies leads
6536 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6539 target = ops[0] = gen_reg_rtx (tmode);
6541 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6547 if (d->fcode == SPU_MASK_FOR_LOAD)
6549 enum machine_mode mode = insn_data[icode].operand[1].mode;
6554 arg = CALL_EXPR_ARG (exp, 0);
6555 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6556 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6557 addr = memory_address (mode, op);
6560 op = gen_reg_rtx (GET_MODE (addr));
6561 emit_insn (gen_rtx_SET (VOIDmode, op,
6562 gen_rtx_NEG (GET_MODE (addr), addr)));
6563 op = gen_rtx_MEM (mode, op);
6565 pat = GEN_FCN (icode) (target, op);
6572 /* Ignore align_hint, but still expand it's args in case they have
6574 if (icode == CODE_FOR_spu_align_hint)
6577 /* Handle the rest of the operands. */
6578 for (p = 1; i < n_operands; i++, p++)
6580 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6581 mode = insn_data[d->icode].operand[i].mode;
6583 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6585 /* mode can be VOIDmode here for labels */
6587 /* For specific intrinsics with an immediate operand, e.g.,
6588 si_ai(), we sometimes need to convert the scalar argument to a
6589 vector argument by splatting the scalar. */
6590 if (VECTOR_MODE_P (mode)
6591 && (GET_CODE (ops[i]) == CONST_INT
6592 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6593 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6595 if (GET_CODE (ops[i]) == CONST_INT)
6596 ops[i] = spu_const (mode, INTVAL (ops[i]));
6599 rtx reg = gen_reg_rtx (mode);
6600 enum machine_mode imode = GET_MODE_INNER (mode);
6601 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6602 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6603 if (imode != GET_MODE (ops[i]))
6604 ops[i] = convert_to_mode (imode, ops[i],
6605 TYPE_UNSIGNED (spu_builtin_types
6607 emit_insn (gen_spu_splats (reg, ops[i]));
6612 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6614 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6615 ops[i] = spu_force_reg (mode, ops[i]);
6621 pat = GEN_FCN (icode) (0);
6624 pat = GEN_FCN (icode) (ops[0]);
6627 pat = GEN_FCN (icode) (ops[0], ops[1]);
6630 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6633 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6636 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6639 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6648 if (d->type == B_CALL || d->type == B_BISLED)
6649 emit_call_insn (pat);
6650 else if (d->type == B_JUMP)
6652 emit_jump_insn (pat);
6658 return_type = spu_builtin_types[d->parm[0]];
6659 if (d->parm[0] != SPU_BTI_VOID
6660 && GET_MODE (target) != TYPE_MODE (return_type))
6662 /* target is the return value. It should always be the mode of
6663 the builtin function prototype. */
6664 target = spu_force_reg (TYPE_MODE (return_type), target);
6671 spu_expand_builtin (tree exp,
6673 rtx subtarget ATTRIBUTE_UNUSED,
6674 enum machine_mode mode ATTRIBUTE_UNUSED,
6675 int ignore ATTRIBUTE_UNUSED)
6677 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6678 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6679 struct spu_builtin_description *d;
6681 if (fcode < NUM_SPU_BUILTINS)
6683 d = &spu_builtins[fcode];
6685 return spu_expand_builtin_1 (d, exp, target);
6690 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6692 spu_builtin_mul_widen_even (tree type)
6694 switch (TYPE_MODE (type))
6697 if (TYPE_UNSIGNED (type))
6698 return spu_builtin_decls[SPU_MULE_0];
6700 return spu_builtin_decls[SPU_MULE_1];
6707 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6709 spu_builtin_mul_widen_odd (tree type)
6711 switch (TYPE_MODE (type))
6714 if (TYPE_UNSIGNED (type))
6715 return spu_builtin_decls[SPU_MULO_1];
6717 return spu_builtin_decls[SPU_MULO_0];
6724 /* Implement targetm.vectorize.builtin_mask_for_load. */
6726 spu_builtin_mask_for_load (void)
6728 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6731 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6733 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6734 tree vectype ATTRIBUTE_UNUSED,
6735 int misalign ATTRIBUTE_UNUSED)
6737 switch (type_of_cost)
6745 case cond_branch_not_taken:
6753 /* Load + rotate. */
6756 case unaligned_load:
6759 case cond_branch_taken:
6767 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6768 after applying N number of iterations. This routine does not determine
6769 how may iterations are required to reach desired alignment. */
6772 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6777 /* All other types are naturally aligned. */
6781 /* Implement targetm.vectorize.builtin_vec_perm. */
6783 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6785 *mask_element_type = unsigned_char_type_node;
6787 switch (TYPE_MODE (type))
6790 if (TYPE_UNSIGNED (type))
6791 return spu_builtin_decls[SPU_SHUFFLE_0];
6793 return spu_builtin_decls[SPU_SHUFFLE_1];
6796 if (TYPE_UNSIGNED (type))
6797 return spu_builtin_decls[SPU_SHUFFLE_2];
6799 return spu_builtin_decls[SPU_SHUFFLE_3];
6802 if (TYPE_UNSIGNED (type))
6803 return spu_builtin_decls[SPU_SHUFFLE_4];
6805 return spu_builtin_decls[SPU_SHUFFLE_5];
6808 if (TYPE_UNSIGNED (type))
6809 return spu_builtin_decls[SPU_SHUFFLE_6];
6811 return spu_builtin_decls[SPU_SHUFFLE_7];
6814 return spu_builtin_decls[SPU_SHUFFLE_8];
6817 return spu_builtin_decls[SPU_SHUFFLE_9];
6824 /* Return the appropriate mode for a named address pointer. */
6825 static enum machine_mode
6826 spu_addr_space_pointer_mode (addr_space_t addrspace)
6830 case ADDR_SPACE_GENERIC:
6839 /* Return the appropriate mode for a named address address. */
6840 static enum machine_mode
6841 spu_addr_space_address_mode (addr_space_t addrspace)
6845 case ADDR_SPACE_GENERIC:
6854 /* Determine if one named address space is a subset of another. */
6857 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6859 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6860 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6862 if (subset == superset)
6865 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6866 being subsets but instead as disjoint address spaces. */
6867 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6871 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6874 /* Convert from one address space to another. */
6876 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6878 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6879 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6881 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6882 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6884 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6888 ls = gen_const_mem (DImode,
6889 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6890 set_mem_align (ls, 128);
6892 result = gen_reg_rtx (Pmode);
6893 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6894 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6895 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6896 ls, const0_rtx, Pmode, 1);
6898 emit_insn (gen_subsi3 (result, op, ls));
6903 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6907 ls = gen_const_mem (DImode,
6908 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6909 set_mem_align (ls, 128);
6911 result = gen_reg_rtx (EAmode);
6912 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6913 op = force_reg (Pmode, op);
6914 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6915 ls, const0_rtx, EAmode, 1);
6916 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6918 if (EAmode == SImode)
6919 emit_insn (gen_addsi3 (result, op, ls));
6921 emit_insn (gen_adddi3 (result, op, ls));
6931 /* Count the total number of instructions in each pipe and return the
6932 maximum, which is used as the Minimum Iteration Interval (MII)
6933 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6934 -2 are instructions that can go in pipe0 or pipe1. */
6936 spu_sms_res_mii (struct ddg *g)
6939 unsigned t[4] = {0, 0, 0, 0};
6941 for (i = 0; i < g->num_nodes; i++)
6943 rtx insn = g->nodes[i].insn;
6944 int p = get_pipe (insn) + 2;
6950 if (dump_file && INSN_P (insn))
6951 fprintf (dump_file, "i%d %s %d %d\n",
6953 insn_data[INSN_CODE(insn)].name,
6957 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6959 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6964 spu_init_expanders (void)
6969 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6970 frame_pointer_needed is true. We don't know that until we're
6971 expanding the prologue. */
6972 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6974 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6975 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6976 to be treated as aligned, so generate them here. */
6977 r0 = gen_reg_rtx (SImode);
6978 r1 = gen_reg_rtx (SImode);
6979 mark_reg_pointer (r0, 128);
6980 mark_reg_pointer (r1, 128);
6981 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6982 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6986 static enum machine_mode
6987 spu_libgcc_cmp_return_mode (void)
6990 /* For SPU word mode is TI mode so it is better to use SImode
6991 for compare returns. */
6995 static enum machine_mode
6996 spu_libgcc_shift_count_mode (void)
6998 /* For SPU word mode is TI mode so it is better to use SImode
6999 for shift counts. */
7003 /* An early place to adjust some flags after GCC has finished processing
7006 asm_file_start (void)
7008 /* Variable tracking should be run after all optimizations which
7009 change order of insns. It also needs a valid CFG. */
7010 spu_flag_var_tracking = flag_var_tracking;
7011 flag_var_tracking = 0;
7013 default_file_start ();
7016 /* Implement targetm.section_type_flags. */
7018 spu_section_type_flags (tree decl, const char *name, int reloc)
7020 /* .toe needs to have type @nobits. */
7021 if (strcmp (name, ".toe") == 0)
7023 /* Don't load _ea into the current address space. */
7024 if (strcmp (name, "._ea") == 0)
7025 return SECTION_WRITE | SECTION_DEBUG;
7026 return default_section_type_flags (decl, name, reloc);
7029 /* Implement targetm.select_section. */
7031 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7033 /* Variables and constants defined in the __ea address space
7034 go into a special section named "._ea". */
7035 if (TREE_TYPE (decl) != error_mark_node
7036 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7038 /* We might get called with string constants, but get_named_section
7039 doesn't like them as they are not DECLs. Also, we need to set
7040 flags in that case. */
7042 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7044 return get_named_section (decl, "._ea", reloc);
7047 return default_elf_select_section (decl, reloc, align);
7050 /* Implement targetm.unique_section. */
7052 spu_unique_section (tree decl, int reloc)
7054 /* We don't support unique section names in the __ea address
7056 if (TREE_TYPE (decl) != error_mark_node
7057 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7060 default_unique_section (decl, reloc);
7063 /* Generate a constant or register which contains 2^SCALE. We assume
7064 the result is valid for MODE. Currently, MODE must be V4SFmode and
7065 SCALE must be SImode. */
7067 spu_gen_exp2 (enum machine_mode mode, rtx scale)
7069 gcc_assert (mode == V4SFmode);
7070 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7071 if (GET_CODE (scale) != CONST_INT)
7073 /* unsigned int exp = (127 + scale) << 23;
7074 __vector float m = (__vector float) spu_splats (exp); */
7075 rtx reg = force_reg (SImode, scale);
7076 rtx exp = gen_reg_rtx (SImode);
7077 rtx mul = gen_reg_rtx (mode);
7078 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7079 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7080 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7085 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7086 unsigned char arr[16];
7087 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7088 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7089 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7090 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7091 return array_to_constant (mode, arr);
7095 /* After reload, just change the convert into a move instruction
7096 or a dead instruction. */
7098 spu_split_convert (rtx ops[])
7100 if (REGNO (ops[0]) == REGNO (ops[1]))
7101 emit_note (NOTE_INSN_DELETED);
7104 /* Use TImode always as this might help hard reg copyprop. */
7105 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7106 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7107 emit_insn (gen_move_insn (op0, op1));
7112 spu_function_profiler (FILE * file, int labelno)
7114 fprintf (file, "# profile\n");
7115 fprintf (file, "brsl $75, _mcount\n");